]> git.saurik.com Git - apple/xnu.git/commitdiff
xnu-1504.3.12.tar.gz mac-os-x-1063 v1504.3.12
authorApple <opensource@apple.com>
Mon, 29 Mar 2010 19:14:35 +0000 (19:14 +0000)
committerApple <opensource@apple.com>
Mon, 29 Mar 2010 19:14:35 +0000 (19:14 +0000)
150 files changed:
bsd/conf/MASTER
bsd/conf/MASTER.i386
bsd/conf/MASTER.ppc
bsd/conf/MASTER.x86_64
bsd/conf/files
bsd/dev/i386/sysctl.c
bsd/hfs/hfs.h
bsd/hfs/hfs_cnode.c
bsd/hfs/hfs_cnode.h
bsd/hfs/hfs_readwrite.c
bsd/hfs/hfs_vfsops.c
bsd/hfs/hfs_vfsutils.c
bsd/hfs/hfs_vnops.c
bsd/hfs/hfs_xattr.c
bsd/hfs/hfscommon/BTree/BTreeAllocate.c
bsd/kern/imageboot.c
bsd/kern/kdebug.c
bsd/kern/kern_event.c
bsd/kern/kern_memorystatus.c
bsd/kern/kern_mman.c
bsd/kern/kern_resource.c
bsd/kern/kern_sysctl.c
bsd/kern/pthread_synch.c
bsd/kern/sys_generic.c
bsd/kern/syscalls.master
bsd/kern/uipc_usrreq.c
bsd/net/Makefile
bsd/net/bridge.c [deleted file]
bsd/net/bridge.h [deleted file]
bsd/net/bridgestp.c [new file with mode: 0644]
bsd/net/dlil.c
bsd/net/ether_at_pr_module.c
bsd/net/ether_if_module.c
bsd/net/ether_inet6_pr_module.c
bsd/net/ether_inet_pr_module.c
bsd/net/ethernet.h
bsd/net/if.h
bsd/net/if_bridge.c [new file with mode: 0644]
bsd/net/if_bridgevar.h [new file with mode: 0644]
bsd/net/if_ethersubr.c
bsd/net/if_llc.h
bsd/net/if_types.h
bsd/net/if_var.h
bsd/net/if_vlan.c
bsd/net/pf.c
bsd/net/pf_ioctl.c
bsd/net/pfvar.h
bsd/net/route.c
bsd/netinet/in_arp.c
bsd/netinet/ip_dummynet.c
bsd/netinet/ip_output.c
bsd/netinet/tcp_input.c
bsd/netinet6/in6.c
bsd/netinet6/in6_ifattach.c
bsd/netinet6/in6_proto.c
bsd/netinet6/ip6_input.c
bsd/netinet6/ip6_mroute.c
bsd/netinet6/ip6_mroute.h
bsd/netinet6/ip6_output.c
bsd/netinet6/ip6_var.h
bsd/netinet6/ipsec.c
bsd/netinet6/mld6.c
bsd/netinet6/nd6.c
bsd/netinet6/raw_ip6.c
bsd/nfs/nfs_socket.c
bsd/sys/buf_internal.h
bsd/sys/kern_memorystatus.h
bsd/sys/mount.h
bsd/sys/mount_internal.h
bsd/sys/pthread_internal.h
bsd/sys/resource.h
bsd/sys/socketvar.h
bsd/sys/sockio.h
bsd/sys/ubc_internal.h
bsd/sys/vnode_internal.h
bsd/vfs/vfs_bio.c
bsd/vfs/vfs_cluster.c
bsd/vfs/vfs_conf.c
bsd/vfs/vfs_subr.c
bsd/vfs/vfs_syscalls.c
config/Makefile
config/MasterVersion
config/generate_linker_exports.sh [new file with mode: 0755]
iokit/Kernel/IODMACommand.cpp
iokit/Kernel/IOServicePM.cpp
kgmacros
libkern/c++/OSKext.cpp
libkern/conf/MASTER
libkern/conf/files
libkern/kxld/Makefile
libkern/kxld/kxld_array.c
libkern/kxld/kxld_demangle.c [new file with mode: 0644]
libkern/kxld/kxld_demangle.h [new file with mode: 0644]
libkern/kxld/kxld_kext.c
libkern/kxld/kxld_util.h
libkern/kxld/kxld_vtable.c
libkern/libkern/OSAtomic.h
libkern/libkern/c++/OSKext.h
libkern/mkext.c
libkern/zlib/adler32.c
libkern/zlib/arm/adler32vec.s [new file with mode: 0644]
libkern/zlib/arm/inffastS.s [new file with mode: 0644]
libkern/zlib/inffast.c
makedefs/MakeInc.def
makedefs/MakeInc.rule
osfmk/conf/MASTER
osfmk/console/panic_dialog.c
osfmk/console/video_console.c
osfmk/i386/AT386/model_dep.c
osfmk/i386/cpu_capabilities.h
osfmk/i386/cpuid.c
osfmk/i386/cpuid.h
osfmk/i386/lapic.c
osfmk/i386/loose_ends.c
osfmk/i386/pmCPU.c
osfmk/i386/pmCPU.h
osfmk/i386/pmap.c
osfmk/i386/pmap.h
osfmk/i386/pmap_internal.h
osfmk/i386/pmap_x86_common.c
osfmk/ipc/ipc_kmsg.c
osfmk/ipc/ipc_kmsg.h
osfmk/ipc/ipc_port.c
osfmk/ipc/ipc_port.h
osfmk/ipc/mach_port.c
osfmk/kdp/kdp.c
osfmk/kdp/kdp_dyld.h [new file with mode: 0644]
osfmk/kdp/kdp_udp.c
osfmk/kern/debug.c
osfmk/kern/debug.h
osfmk/kern/processor.c
osfmk/kern/processor.h
osfmk/kern/sched.h
osfmk/kern/sched_prim.c
osfmk/kern/sched_prim.h
osfmk/kern/task_policy.c
osfmk/kern/thread.c
osfmk/kern/thread.h
osfmk/kern/thread_call.c
osfmk/mach/task_policy.h
osfmk/mach/vm_prot.h
osfmk/ppc/machine_routines.c
osfmk/vm/vm_fault.c
osfmk/vm/vm_map.c
osfmk/vm/vm_map.h
osfmk/x86_64/loose_ends.c
osfmk/x86_64/pmap.c
pexpert/gen/bootargs.c
pexpert/i386/pe_init.c
pexpert/pexpert/pexpert.h

index ec9ff0940714cc0501776ebf2d6655a06ffd386d..36c667094d078a4106b3a0bcfbb8c15e25d3d3ae 100644 (file)
@@ -195,6 +195,7 @@ options             QUOTA           # file system quotas            # <quota>
 options                REV_ENDIAN_FS   # Reverse Endian FS             # <revfs>
 options                NAMEDSTREAMS    # named stream vnop support     # <namedstreams>
 options                CONFIG_VOLFS    # volfs path support (legacy)   # <config_volfs>
 options                REV_ENDIAN_FS   # Reverse Endian FS             # <revfs>
 options                NAMEDSTREAMS    # named stream vnop support     # <namedstreams>
 options                CONFIG_VOLFS    # volfs path support (legacy)   # <config_volfs>
+options                CONFIG_IMGSRC_ACCESS # source of imageboot dmg  # <config_imgsrc_access>
 
 #
 # NFS support
 
 #
 # NFS support
@@ -245,6 +246,8 @@ options                     randomipid              # <inet,randomipid>
 
 options                ZLIB            # inflate/deflate support       # <zlib>
 
 
 options                ZLIB            # inflate/deflate support       # <zlib>
 
+options                IF_BRIDGE                       # <if_bridge>
+
 makeoptions    LIBDRIVER = "libDriver_kern.o"                  # <libdriver>
 makeoptions    LIBOBJC   = "libkobjc.o"                        # <kernobjc>
 
 makeoptions    LIBDRIVER = "libDriver_kern.o"                  # <libdriver>
 makeoptions    LIBOBJC   = "libkobjc.o"                        # <kernobjc>
 
@@ -306,6 +309,9 @@ options   CONFIG_VFS_NAMES=4096             # <medium>
 options   CONFIG_VFS_NAMES=3072                # <small,xsmall>
 options   CONFIG_VFS_NAMES=2048                # <bsmall>
 
 options   CONFIG_VFS_NAMES=3072                # <small,xsmall>
 options   CONFIG_VFS_NAMES=2048                # <bsmall>
 
+options   CONFIG_MAX_CLUSTERS=8                # <xlarge,large,medium>
+options   CONFIG_MAX_CLUSTERS=4                # <small,xsmall,bsmall>
+
 #
 #  configurable kauth credential related resources 
 #
 #
 #  configurable kauth credential related resources 
 #
@@ -409,6 +415,10 @@ options   CONFIG_EMBEDDED                  # <config_embedded>
 #
 options   CONFIG_ENFORCE_SIGNED_CODE           # <config_embedded>
 
 #
 options   CONFIG_ENFORCE_SIGNED_CODE           # <config_embedded>
 
+# support dynamic signing of code
+#
+options                CONFIG_DYNAMIC_CODE_SIGNING     # <dynamic_codesigning>
+
 #
 # code decryption... used on embedded for app protection
 # must be set in all the bsd/conf and osfmk/conf MASTER files
 #
 # code decryption... used on embedded for app protection
 # must be set in all the bsd/conf and osfmk/conf MASTER files
index 08eca2cbc943143890692ab2f963f8441dffd38c..b953aaed944fc41fc863b6b59b97fac929a9e93c 100644 (file)
@@ -45,8 +45,8 @@
 #  Standard Apple Research Configurations:
 #  -------- ----- -------- ---------------
 #  BASE =        [ intel mach medium config_dtrace vol pst gdb kernobjc fixpri simple_clock mdebug kernserv driverkit uxpr kernstack ipc_compat ipc_debug sysv_sem sysv_msg sysv_shm audit panic_info config_imageboot config_workqueue psynch ]
 #  Standard Apple Research Configurations:
 #  -------- ----- -------- ---------------
 #  BASE =        [ intel mach medium config_dtrace vol pst gdb kernobjc fixpri simple_clock mdebug kernserv driverkit uxpr kernstack ipc_compat ipc_debug sysv_sem sysv_msg sysv_shm audit panic_info config_imageboot config_workqueue psynch ]
-#  FILESYS =    [ devfs revfs hfs journaling fdesc config_fse quota namedstreams fifo union config_volfs hfs_compression ]
-#  NETWORKING =  [ inet inet6 compat_oldsock mrouting tcpdrop_synfin bpfilter ipdivert ipfirewall ipv6firewall ipfw2 dummynet traffic_mgt sendfile netmibs bond vlan gif stf zlib randomipid ifnet_input_chk config_mbuf_jumbo ipflow ]
+#  FILESYS =    [ devfs revfs hfs journaling fdesc config_fse quota namedstreams fifo union config_volfs hfs_compression config_imgsrc_access ]
+#  NETWORKING =  [ inet inet6 compat_oldsock tcpdrop_synfin bpfilter ipdivert ipfirewall ipv6firewall ipfw2 dummynet traffic_mgt sendfile netmibs bond vlan gif stf zlib randomipid ifnet_input_chk config_mbuf_jumbo ipflow ]
 #  NFS =         [ nfsclient nfsserver ]
 #  VPN =         [ ipsec ]
 #  RELEASE =     [ BASE NETWORKING NFS VPN FILESYS libdriver ]
 #  NFS =         [ nfsclient nfsserver ]
 #  VPN =         [ ipsec ]
 #  RELEASE =     [ BASE NETWORKING NFS VPN FILESYS libdriver ]
@@ -56,7 +56,7 @@
 #
 #  EMBEDDED_BASE =     [ intel mach bsmall vol pst gdb kernobjc fixpri simple_clock mdebug kernserv driverkit uxpr kernstack ipc_compat ipc_debug sysv_sem sysv_msg sysv_shm audit panic_info config_imageboot config_workqueue psynch ]
 #  EMBEDDED_FILESYS =  [ devfs hfs journaling fdesc fifo ]
 #
 #  EMBEDDED_BASE =     [ intel mach bsmall vol pst gdb kernobjc fixpri simple_clock mdebug kernserv driverkit uxpr kernstack ipc_compat ipc_debug sysv_sem sysv_msg sysv_shm audit panic_info config_imageboot config_workqueue psynch ]
 #  EMBEDDED_FILESYS =  [ devfs hfs journaling fdesc fifo ]
-#  EMBEDDED_NET =      [ inet compat_oldsock mrouting tcpdrop_synfin bpfilter config_mbuf_noexpand ]
+#  EMBEDDED_NET =      [ inet compat_oldsock tcpdrop_synfin bpfilter config_mbuf_noexpand ]
 #  EMBEDDED =          [ EMBEDDED_BASE EMBEDDED_NET VPN EMBEDDED_FILESYS libdriver no_printf_str no_kprintf_str no_kdebug ]
 #  DEVELOPMENT =       [ EMBEDDED_BASE EMBEDDED_NET NFS VPN EMBEDDED_FILESYS libdriver netmibs development mach_assert config_dtrace ]
 #
 #  EMBEDDED =          [ EMBEDDED_BASE EMBEDDED_NET VPN EMBEDDED_FILESYS libdriver no_printf_str no_kprintf_str no_kdebug ]
 #  DEVELOPMENT =       [ EMBEDDED_BASE EMBEDDED_NET NFS VPN EMBEDDED_FILESYS libdriver netmibs development mach_assert config_dtrace ]
 #
index 2a084643342a11c4790b20dbe7a6091f52e878af..54ba3d5658f1db8673b80d75f3506ee5ef6858dd 100644 (file)
@@ -47,7 +47,7 @@
 #
 #  BASE =        [ ppc mach medium config_dtrace vol pst gdb noprofiling simple_clock kernstack sysv_sem sysv_msg sysv_shm audit panic_info config_imageboot config_workqueue ]
 #  FILESYS =    [ devfs revfs hfs journaling fdesc config_fse quota namedstreams fifo union config_volfs hfs_compression ]
 #
 #  BASE =        [ ppc mach medium config_dtrace vol pst gdb noprofiling simple_clock kernstack sysv_sem sysv_msg sysv_shm audit panic_info config_imageboot config_workqueue ]
 #  FILESYS =    [ devfs revfs hfs journaling fdesc config_fse quota namedstreams fifo union config_volfs hfs_compression ]
-#  NETWORKING =  [ inet inet6 compat_oldsock mrouting tcpdrop_synfin bpfilter ipdivert ipfirewall ipv6firewall ipfw2 dummynet traffic_mgt sendfile netmibs bond vlan gif stf zlib randomipid ifnet_input_chk ipflow ]
+#  NETWORKING =  [ inet inet6 compat_oldsock tcpdrop_synfin bpfilter ipdivert ipfirewall ipv6firewall ipfw2 dummynet traffic_mgt sendfile netmibs bond vlan gif stf zlib randomipid ifnet_input_chk ipflow ]
 #  NFS =         [ nfsclient nfsserver ]
 #  VPN =         [ ipsec ]
 #  RELEASE =     [ BASE NETWORKING NFS VPN FILESYS libdriver ]
 #  NFS =         [ nfsclient nfsserver ]
 #  VPN =         [ ipsec ]
 #  RELEASE =     [ BASE NETWORKING NFS VPN FILESYS libdriver ]
index dd1f24e9691951ae5a76f49448e3bab45b7211c3..3815e81f0814706dfc6267ccca35cfc17283e4a2 100644 (file)
@@ -45,8 +45,8 @@
 #  Standard Apple Research Configurations:
 #  -------- ----- -------- ---------------
 #  BASE =        [ intel mach medium config_dtrace vol pst gdb kernobjc fixpri simple_clock mdebug kernserv driverkit uxpr kernstack ipc_compat ipc_debug sysv_sem sysv_msg sysv_shm audit panic_info config_imageboot config_workqueue psynch ]
 #  Standard Apple Research Configurations:
 #  -------- ----- -------- ---------------
 #  BASE =        [ intel mach medium config_dtrace vol pst gdb kernobjc fixpri simple_clock mdebug kernserv driverkit uxpr kernstack ipc_compat ipc_debug sysv_sem sysv_msg sysv_shm audit panic_info config_imageboot config_workqueue psynch ]
-#  FILESYS =    [ devfs revfs hfs journaling fdesc config_fse quota namedstreams fifo union config_volfs hfs_compression ]
-#  NETWORKING =  [ inet inet6 compat_oldsock mrouting tcpdrop_synfin bpfilter ipdivert ipfirewall ipv6firewall ipfw2 dummynet traffic_mgt sendfile netmibs bond vlan gif stf zlib randomipid ifnet_input_chk config_mbuf_jumbo ipflow ]
+#  FILESYS =    [ devfs revfs hfs journaling fdesc config_fse quota namedstreams fifo union config_volfs hfs_compression config_imgsrc_access ]
+#  NETWORKING =  [ inet inet6 compat_oldsock tcpdrop_synfin bpfilter ipdivert ipfirewall ipv6firewall ipfw2 dummynet traffic_mgt sendfile netmibs bond vlan gif stf zlib randomipid ifnet_input_chk config_mbuf_jumbo ipflow ]
 #  NFS =         [ nfsclient nfsserver ]
 #  VPN =         [ ipsec ]
 #  RELEASE =     [ BASE NETWORKING NFS VPN FILESYS libdriver ]
 #  NFS =         [ nfsclient nfsserver ]
 #  VPN =         [ ipsec ]
 #  RELEASE =     [ BASE NETWORKING NFS VPN FILESYS libdriver ]
@@ -56,7 +56,7 @@
 #
 #  EMBEDDED_BASE =     [ intel mach bsmall vol pst gdb kernobjc fixpri simple_clock mdebug kernserv driverkit uxpr kernstack ipc_compat ipc_debug sysv_sem sysv_msg sysv_shm audit panic_info config_imageboot config_workqueue psynch ]
 #  EMBEDDED_FILESYS =  [ devfs hfs journaling fdesc fifo ]
 #
 #  EMBEDDED_BASE =     [ intel mach bsmall vol pst gdb kernobjc fixpri simple_clock mdebug kernserv driverkit uxpr kernstack ipc_compat ipc_debug sysv_sem sysv_msg sysv_shm audit panic_info config_imageboot config_workqueue psynch ]
 #  EMBEDDED_FILESYS =  [ devfs hfs journaling fdesc fifo ]
-#  EMBEDDED_NET =      [ inet compat_oldsock mrouting tcpdrop_synfin bpfilter config_mbuf_noexpand ]
+#  EMBEDDED_NET =      [ inet compat_oldsock tcpdrop_synfin bpfilter config_mbuf_noexpand ]
 #  EMBEDDED =          [ EMBEDDED_BASE EMBEDDED_NET VPN EMBEDDED_FILESYS libdriver no_printf_str no_kprintf_str no_kdebug ]
 #  DEVELOPMENT =       [ EMBEDDED_BASE EMBEDDED_NET NFS VPN EMBEDDED_FILESYS libdriver netmibs development mach_assert ]
 #
 #  EMBEDDED =          [ EMBEDDED_BASE EMBEDDED_NET VPN EMBEDDED_FILESYS libdriver no_printf_str no_kprintf_str no_kdebug ]
 #  DEVELOPMENT =       [ EMBEDDED_BASE EMBEDDED_NET NFS VPN EMBEDDED_FILESYS libdriver netmibs development mach_assert ]
 #
index 95f856c21f260df3046830c93f4707b22740b30f..fce436ec6129ddda73ba3526163a28fcc8c9e753 100644 (file)
@@ -107,7 +107,7 @@ OPTIONS/ipfw2                               optional ipfw2
 OPTIONS/ipfirewall                     optional ipfirewall
 OPTIONS/ipv6firewall           optional ipv6firewall
 OPTIONS/tcpdebug                       optional tcpdebug
 OPTIONS/ipfirewall                     optional ipfirewall
 OPTIONS/ipv6firewall           optional ipv6firewall
 OPTIONS/tcpdebug                       optional tcpdebug
-OPTIONS/bridge                         optional bridge
+OPTIONS/if_bridge                      optional if_bridge
 OPTIONS/faith                          optional faith
 OPTIONS/gif                                    optional gif
 OPTIONS/netat                          optional netat
 OPTIONS/faith                          optional faith
 OPTIONS/gif                                    optional gif
 OPTIONS/netat                          optional netat
@@ -200,7 +200,8 @@ bsd/kern/decmpfs.c                  standard
 
 bsd/net/bpf.c                          optional bpfilter
 bsd/net/bpf_filter.c                   optional bpfilter
 
 bsd/net/bpf.c                          optional bpfilter
 bsd/net/bpf_filter.c                   optional bpfilter
-bsd/net/bridge.c                       optional bridge
+bsd/net/if_bridge.c                    optional if_bridge
+bsd/net/bridgestp.c                    optional if_bridge
 bsd/net/bsd_comp.c                     optional ppp_bsdcomp
 bsd/net/if.c                           optional networking
 bsd/net/if_atmsubr.c                   optional atm
 bsd/net/bsd_comp.c                     optional ppp_bsdcomp
 bsd/net/if.c                           optional networking
 bsd/net/if_atmsubr.c                   optional atm
index 2b9609d530dff3b88078cf3283bb972e9903ad32..597a208c1714bf6643c778fb7b85e6c8438a8d81 100644 (file)
@@ -314,6 +314,12 @@ SYSCTL_PROC(_machdep_cpu_thermal, OID_AUTO, dynamic_acceleration,
            sizeof(boolean_t),
            cpu_thermal, "I", "Dynamic Acceleration Technology (Turbo Mode)");
 
            sizeof(boolean_t),
            cpu_thermal, "I", "Dynamic Acceleration Technology (Turbo Mode)");
 
+SYSCTL_PROC(_machdep_cpu_thermal, OID_AUTO, invariant_APIC_timer,
+           CTLTYPE_INT | CTLFLAG_RD, 
+           (void *)offsetof(cpuid_thermal_leaf_t, invariant_APIC_timer),
+           sizeof(boolean_t),
+           cpu_thermal, "I", "Invariant APIC Timer");
+
 SYSCTL_PROC(_machdep_cpu_thermal, OID_AUTO, thresholds,
            CTLTYPE_INT | CTLFLAG_RD, 
            (void *)offsetof(cpuid_thermal_leaf_t, thresholds),
 SYSCTL_PROC(_machdep_cpu_thermal, OID_AUTO, thresholds,
            CTLTYPE_INT | CTLFLAG_RD, 
            (void *)offsetof(cpuid_thermal_leaf_t, thresholds),
index 67ecb6d1ee40219ba0bf70f86e0c973dfc13a3a5..beb10099f697171fad4420d58552dc58c2a59459 100644 (file)
@@ -755,7 +755,7 @@ extern int hfs_btsync(struct vnode *vp, int sync_transaction);
 extern void replace_desc(struct cnode *cp, struct cat_desc *cdp);
 
 extern int hfs_vgetrsrc(struct hfsmount *hfsmp, struct vnode *vp,
 extern void replace_desc(struct cnode *cp, struct cat_desc *cdp);
 
 extern int hfs_vgetrsrc(struct hfsmount *hfsmp, struct vnode *vp,
-                       struct vnode **rvpp, int can_drop_lock);
+                       struct vnode **rvpp, int can_drop_lock, int error_on_unlinked);
 
 extern int hfs_update(struct vnode *, int);
 
 
 extern int hfs_update(struct vnode *, int);
 
index 7123f603fe7d6317ba9a7448de1426280959b309..c17c8d4ddbe0ac5bf2eb1d81727ad78b824507c7 100644 (file)
@@ -59,6 +59,10 @@ static void  hfs_reclaim_cnode(struct cnode *);
 
 static int hfs_isordered(struct cnode *, struct cnode *);
 
 
 static int hfs_isordered(struct cnode *, struct cnode *);
 
+inline int hfs_checkdeleted (struct cnode *cp) {
+       return ((cp->c_flag & (C_DELETED | C_NOEXISTS)) ? ENOENT : 0);
+}
+
 
 /*
  * Last reference to an cnode.  If necessary, write or delete it.
 
 /*
  * Last reference to an cnode.  If necessary, write or delete it.
@@ -195,7 +199,7 @@ hfs_vnop_inactive(struct vnop_inactive_args *ap)
                if ((cp->c_blocks > 0) && (forkcount == 1) && (vp != cp->c_rsrc_vp)) {
                        struct vnode *rvp = NULLVP;
 
                if ((cp->c_blocks > 0) && (forkcount == 1) && (vp != cp->c_rsrc_vp)) {
                        struct vnode *rvp = NULLVP;
 
-                       error = hfs_vgetrsrc(hfsmp, vp, &rvp, FALSE);
+                       error = hfs_vgetrsrc(hfsmp, vp, &rvp, FALSE, FALSE);
                        if (error)
                                goto out;
                        /*
                        if (error)
                                goto out;
                        /*
@@ -612,9 +616,15 @@ hfs_getnewvnode(
                return (ENOENT);
        }
 
                return (ENOENT);
        }
 
-       /* Hardlinks may need an updated catalog descriptor */
-       if ((cp->c_flag & C_HARDLINK) && descp->cd_nameptr && descp->cd_namelen > 0) {
-               replace_desc(cp, descp);
+       /* 
+        * Hardlinks may need an updated catalog descriptor.  However, if
+        * the cnode has already been marked as open-unlinked (C_DELETED), then don't
+        * replace its descriptor. 
+        */
+       if (!(hfs_checkdeleted(cp))) {
+               if ((cp->c_flag & C_HARDLINK) && descp->cd_nameptr && descp->cd_namelen > 0) {
+                       replace_desc(cp, descp);
+               }
        }
        /* Check if we found a matching vnode */
        if (*vpp != NULL)
        }
        /* Check if we found a matching vnode */
        if (*vpp != NULL)
index 27c1b9a55c348d3a89e9fc094bc8cc67571bb4d2..9ffb9a8ca277bc95f178e61f1001b5bf863fad5e 100644 (file)
@@ -227,6 +227,16 @@ enum { kFinderInvisibleMask = 1 << 14 };
                         FTOC(fp)->c_rsrc_vp :                  \
                         FTOC(fp)->c_vp)
 
                         FTOC(fp)->c_rsrc_vp :                  \
                         FTOC(fp)->c_vp)
 
+/*
+ * This is a helper function used for determining whether or not a cnode has become open
+ * unlinked in between the time we acquired its vnode and the time we acquire the cnode lock
+ * to start manipulating it.  Due to the SMP nature of VFS, it is probably necessary to 
+ * use this macro every time we acquire a cnode lock, as the content of the Cnode may have
+ * been modified in betweeen the lookup and a VNOP.  Whether or not to call this is dependent
+ * upon the VNOP in question.  Sometimes it is OK to use an open-unlinked file, for example, in,
+ * reading.  But other times, such as on the source of a VNOP_RENAME, it should be disallowed.
+ */
+int hfs_checkdeleted (struct cnode *cp);
 
 /*
  * Test for a resource fork
 
 /*
  * Test for a resource fork
index 6dc30afad3270c1f85516de58e350d3a72a36f44..97578830da10d984f4270e50b54825fef5df0448 100644 (file)
@@ -3296,6 +3296,7 @@ hfs_vnop_pageout(struct vnop_pageout_args *ap)
        vm_offset_t     a_pl_offset;
        int             a_flags;
        int is_pageoutv2 = 0;
        vm_offset_t     a_pl_offset;
        int             a_flags;
        int is_pageoutv2 = 0;
+       kern_return_t kret;
 
        cp = VTOC(vp);
        fp = VTOF(vp);
 
        cp = VTOC(vp);
        fp = VTOF(vp);
@@ -3339,9 +3340,9 @@ hfs_vnop_pageout(struct vnop_pageout_args *ap)
                else {
                        request_flags = UPL_UBC_PAGEOUT | UPL_RET_ONLY_DIRTY;
                }
                else {
                        request_flags = UPL_UBC_PAGEOUT | UPL_RET_ONLY_DIRTY;
                }
-               ubc_create_upl(vp, ap->a_f_offset, ap->a_size, &upl, &pl, request_flags); 
+               kret = ubc_create_upl(vp, ap->a_f_offset, ap->a_size, &upl, &pl, request_flags); 
 
 
-               if (upl == (upl_t) NULL) {
+               if ((kret != KERN_SUCCESS) || (upl == (upl_t) NULL)) {
                        retval = EINVAL;
                        goto pageout_done;
                }
                        retval = EINVAL;
                        goto pageout_done;
                }
index 8148697b213dbbd6445697cb6efa64e87c2f0ec7..de087422b7e6bcdafa190b03879de361067c3e41 100644 (file)
@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (c) 1999-2009 Apple Inc. All rights reserved.
+ * Copyright (c) 1999-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -114,6 +114,8 @@ int hfs_dbg_all = 0;
 int hfs_dbg_err = 0;
 #endif
 
 int hfs_dbg_err = 0;
 #endif
 
+/* Enable/disable debugging code for live volume resizing */
+int hfs_resize_debug = 0;
 
 lck_grp_attr_t *  hfs_group_attr;
 lck_attr_t *  hfs_lock_attr;
 
 lck_grp_attr_t *  hfs_group_attr;
 lck_attr_t *  hfs_lock_attr;
@@ -146,8 +148,7 @@ static int hfs_unmount(struct mount *mp, int mntflags, vfs_context_t context);
 static int hfs_vptofh(struct vnode *vp, int *fhlenp, unsigned char *fhp, vfs_context_t context);
 
 static int hfs_reclaimspace(struct hfsmount *hfsmp, u_int32_t startblk, u_int32_t reclaimblks, vfs_context_t context);
 static int hfs_vptofh(struct vnode *vp, int *fhlenp, unsigned char *fhp, vfs_context_t context);
 
 static int hfs_reclaimspace(struct hfsmount *hfsmp, u_int32_t startblk, u_int32_t reclaimblks, vfs_context_t context);
-static int hfs_overlapped_overflow_extents(struct hfsmount *hfsmp, u_int32_t startblk,
-                                           u_int32_t catblks, u_int32_t fileID, int rsrcfork);
+static int hfs_overlapped_overflow_extents(struct hfsmount *hfsmp, u_int32_t startblk, u_int32_t fileID);
 static int hfs_journal_replay(vnode_t devvp, vfs_context_t context);
 
 
 static int hfs_journal_replay(vnode_t devvp, vfs_context_t context);
 
 
@@ -3803,17 +3804,18 @@ hfs_truncatefs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context)
        u_int32_t reclaimblks = 0;
        int lockflags = 0;
        int transaction_begun = 0;
        u_int32_t reclaimblks = 0;
        int lockflags = 0;
        int transaction_begun = 0;
+       Boolean updateFreeBlocks = false;
        int error;
 
        int error;
 
-       lck_mtx_lock(&hfsmp->hfs_mutex);
+       HFS_MOUNT_LOCK(hfsmp, TRUE);    
        if (hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS) {
        if (hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS) {
-               lck_mtx_unlock(&hfsmp->hfs_mutex);
+               HFS_MOUNT_UNLOCK(hfsmp, TRUE);  
                return (EALREADY);
        }
        hfsmp->hfs_flags |= HFS_RESIZE_IN_PROGRESS;
        hfsmp->hfs_resize_filesmoved = 0;
        hfsmp->hfs_resize_totalfiles = 0;
                return (EALREADY);
        }
        hfsmp->hfs_flags |= HFS_RESIZE_IN_PROGRESS;
        hfsmp->hfs_resize_filesmoved = 0;
        hfsmp->hfs_resize_totalfiles = 0;
-       lck_mtx_unlock(&hfsmp->hfs_mutex);
+       HFS_MOUNT_UNLOCK(hfsmp, TRUE);  
 
        /*
         * - Journaled HFS Plus volumes only.
 
        /*
         * - Journaled HFS Plus volumes only.
@@ -3828,18 +3830,23 @@ hfs_truncatefs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context)
        newblkcnt = newsize / hfsmp->blockSize;
        reclaimblks = hfsmp->totalBlocks - newblkcnt;
 
        newblkcnt = newsize / hfsmp->blockSize;
        reclaimblks = hfsmp->totalBlocks - newblkcnt;
 
+       if (hfs_resize_debug) {
+               printf ("hfs_truncatefs: old: size=%qu, blkcnt=%u, freeblks=%u\n", oldsize, hfsmp->totalBlocks, hfs_freeblks(hfsmp, 1));
+               printf ("hfs_truncatefs: new: size=%qu, blkcnt=%u, reclaimblks=%u\n", newsize, newblkcnt, reclaimblks);
+       }
+
        /* Make sure new size is valid. */
        if ((newsize < HFS_MIN_SIZE) ||
            (newsize >= oldsize) ||
            (newsize % hfsmp->hfs_logical_block_size) ||
            (newsize % hfsmp->hfs_physical_block_size)) {
        /* Make sure new size is valid. */
        if ((newsize < HFS_MIN_SIZE) ||
            (newsize >= oldsize) ||
            (newsize % hfsmp->hfs_logical_block_size) ||
            (newsize % hfsmp->hfs_physical_block_size)) {
-               printf ("hfs_truncatefs: invalid size\n");
+               printf ("hfs_truncatefs: invalid size (newsize=%qu, oldsize=%qu)\n", newsize, oldsize);
                error = EINVAL;
                goto out;
        }
                error = EINVAL;
                goto out;
        }
-       /* Make sure there's enough space to work with. */
+       /* Make sure that the file system has enough free blocks reclaim */
        if (reclaimblks >= hfs_freeblks(hfsmp, 1)) {
        if (reclaimblks >= hfs_freeblks(hfsmp, 1)) {
-               printf("hfs_truncatefs: insufficient space (need %u blocks; have %u blocks)\n", reclaimblks, hfs_freeblks(hfsmp, 1));
+               printf("hfs_truncatefs: insufficient space (need %u blocks; have %u free blocks)\n", reclaimblks, hfs_freeblks(hfsmp, 1));
                error = ENOSPC;
                goto out;
        }
                error = ENOSPC;
                goto out;
        }
@@ -3862,17 +3869,21 @@ hfs_truncatefs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context)
         * in the allocation blocks beyond (i.e. the blocks we're trying to
         * truncate away.
         */
         * in the allocation blocks beyond (i.e. the blocks we're trying to
         * truncate away.
         */
-       lck_mtx_lock(&hfsmp->hfs_mutex);
+       HFS_MOUNT_LOCK(hfsmp, TRUE);    
        if (hfsmp->blockSize == 512) 
                hfsmp->allocLimit = newblkcnt - 2;
        else
                hfsmp->allocLimit = newblkcnt - 1;
        if (hfsmp->blockSize == 512) 
                hfsmp->allocLimit = newblkcnt - 2;
        else
                hfsmp->allocLimit = newblkcnt - 1;
+       /* Update the volume free block count to reflect the total number of 
+        * free blocks that will exist after a successful resize.
+        */
        hfsmp->freeBlocks -= reclaimblks;
        hfsmp->freeBlocks -= reclaimblks;
-       lck_mtx_unlock(&hfsmp->hfs_mutex);
-       
+       updateFreeBlocks = true;
+       HFS_MOUNT_UNLOCK(hfsmp, TRUE);  
+
        /*
         * Look for files that have blocks at or beyond the location of the
        /*
         * Look for files that have blocks at or beyond the location of the
-        * new alternate volume header.
+        * new alternate volume header
         */
        if (hfs_isallocated(hfsmp, hfsmp->allocLimit, reclaimblks)) {
                /*
         */
        if (hfs_isallocated(hfsmp, hfsmp->allocLimit, reclaimblks)) {
                /*
@@ -3883,8 +3894,9 @@ hfs_truncatefs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context)
                transaction_begun = 0;
 
                /* Attempt to reclaim some space. */ 
                transaction_begun = 0;
 
                /* Attempt to reclaim some space. */ 
-               if (hfs_reclaimspace(hfsmp, hfsmp->allocLimit, reclaimblks, context) != 0) {
-                       printf("hfs_truncatefs: couldn't reclaim space on %s\n", hfsmp->vcbVN);
+               error = hfs_reclaimspace(hfsmp, hfsmp->allocLimit, reclaimblks, context);
+               if (error != 0) {
+                       printf("hfs_truncatefs: couldn't reclaim space on %s (error=%d)\n", hfsmp->vcbVN, error);
                        error = ENOSPC;
                        goto out;
                }
                        error = ENOSPC;
                        goto out;
                }
@@ -3895,8 +3907,9 @@ hfs_truncatefs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context)
                transaction_begun = 1;
                
                /* Check if we're clear now. */
                transaction_begun = 1;
                
                /* Check if we're clear now. */
-               if (hfs_isallocated(hfsmp, hfsmp->allocLimit, reclaimblks)) {
-                       printf("hfs_truncatefs: didn't reclaim enough space on %s\n", hfsmp->vcbVN);
+               error = hfs_isallocated(hfsmp, hfsmp->allocLimit, reclaimblks);
+               if (error != 0) {
+                       printf("hfs_truncatefs: didn't reclaim enough space on %s (error=%d)\n", hfsmp->vcbVN, error);
                        error = EAGAIN;  /* tell client to try again */
                        goto out;
                }
                        error = EAGAIN;  /* tell client to try again */
                        goto out;
                }
@@ -3933,14 +3946,16 @@ hfs_truncatefs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context)
         * since this block will be outside of the truncated file system!
         */
        if (hfsmp->hfs_alt_id_sector) {
         * since this block will be outside of the truncated file system!
         */
        if (hfsmp->hfs_alt_id_sector) {
-               if (buf_meta_bread(hfsmp->hfs_devvp, 
+               error = buf_meta_bread(hfsmp->hfs_devvp, 
                                HFS_PHYSBLK_ROUNDDOWN(hfsmp->hfs_alt_id_sector, hfsmp->hfs_log_per_phys),
                                HFS_PHYSBLK_ROUNDDOWN(hfsmp->hfs_alt_id_sector, hfsmp->hfs_log_per_phys),
-                               hfsmp->hfs_physical_block_size, NOCRED, &bp) == 0) {
-       
+                               hfsmp->hfs_physical_block_size, NOCRED, &bp);
+               if (error == 0) {
                        bzero((void*)((char *)buf_dataptr(bp) + HFS_ALT_OFFSET(hfsmp->hfs_physical_block_size)), kMDBSize);
                        (void) VNOP_BWRITE(bp);
                        bzero((void*)((char *)buf_dataptr(bp) + HFS_ALT_OFFSET(hfsmp->hfs_physical_block_size)), kMDBSize);
                        (void) VNOP_BWRITE(bp);
-               } else if (bp) {
-                       buf_brelse(bp);
+               } else {
+                       if (bp) {
+                               buf_brelse(bp);
+                       }
                }
                bp = NULL;
        }
                }
                bp = NULL;
        }
@@ -3963,7 +3978,7 @@ hfs_truncatefs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context)
        /*
         * TODO: Adjust the size of the metadata zone based on new volume size?
         */
        /*
         * TODO: Adjust the size of the metadata zone based on new volume size?
         */
-        
+       
        /*
         * Adjust the size of hfsmp->hfs_attrdata_vp
         */
        /*
         * Adjust the size of hfsmp->hfs_attrdata_vp
         */
@@ -3985,15 +4000,14 @@ hfs_truncatefs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context)
        }
        
 out:
        }
        
 out:
-       if (error)
-               hfsmp->freeBlocks += reclaimblks;
-       
        lck_mtx_lock(&hfsmp->hfs_mutex);
        lck_mtx_lock(&hfsmp->hfs_mutex);
+       if (error && (updateFreeBlocks == true)) 
+               hfsmp->freeBlocks += reclaimblks;
        hfsmp->allocLimit = hfsmp->totalBlocks;
        if (hfsmp->nextAllocation >= hfsmp->allocLimit)
                hfsmp->nextAllocation = hfsmp->hfs_metazone_end + 1;
        hfsmp->hfs_flags &= ~HFS_RESIZE_IN_PROGRESS;
        hfsmp->allocLimit = hfsmp->totalBlocks;
        if (hfsmp->nextAllocation >= hfsmp->allocLimit)
                hfsmp->nextAllocation = hfsmp->hfs_metazone_end + 1;
        hfsmp->hfs_flags &= ~HFS_RESIZE_IN_PROGRESS;
-       lck_mtx_unlock(&hfsmp->hfs_mutex);
+       HFS_MOUNT_UNLOCK(hfsmp, TRUE);  
        
        if (lockflags) {
                hfs_systemfile_unlock(hfsmp, lockflags);
        
        if (lockflags) {
                hfs_systemfile_unlock(hfsmp, lockflags);
@@ -4001,6 +4015,8 @@ out:
        if (transaction_begun) {
                hfs_end_transaction(hfsmp);
                hfs_journal_flush(hfsmp);
        if (transaction_begun) {
                hfs_end_transaction(hfsmp);
                hfs_journal_flush(hfsmp);
+               /* Just to be sure, sync all data to the disk */
+               (void) VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, context);
        }
 
        return (error);
        }
 
        return (error);
@@ -4077,18 +4093,6 @@ hfs_copy_extent(
        if (cp != hfsmp->hfs_allocation_cp && cp->c_lockowner != current_thread())
                panic("hfs_copy_extent: vp=%p (cp=%p) not owned?\n", vp, cp);
 
        if (cp != hfsmp->hfs_allocation_cp && cp->c_lockowner != current_thread())
                panic("hfs_copy_extent: vp=%p (cp=%p) not owned?\n", vp, cp);
 
-       /*
-        * Wait for any in-progress writes to this vnode to complete, so that we'll
-        * be copying consistent bits.  (Otherwise, it's possible that an async
-        * write will complete to the old extent after we read from it.  That
-        * could lead to corruption.)
-        */
-       err = vnode_waitforwrites(vp, 0, 0, 0, "hfs_copy_extent");
-       if (err) {
-               printf("hfs_copy_extent: Error %d from vnode_waitforwrites\n", err);
-               return err;
-       }
-       
        /*
         * Determine the I/O size to use
         *
        /*
         * Determine the I/O size to use
         *
@@ -4134,7 +4138,7 @@ hfs_copy_extent(
                buf_setcount(bp, ioSize);
                buf_setblkno(bp, destSector);
                buf_setlblkno(bp, destSector);
                buf_setcount(bp, ioSize);
                buf_setblkno(bp, destSector);
                buf_setlblkno(bp, destSector);
-               if (journal_uses_fua(hfsmp->jnl))
+               if (vnode_issystem(vp) && journal_uses_fua(hfsmp->jnl))
                        buf_markfua(bp);
                        
                /* Do the write */
                        buf_markfua(bp);
                        
                /* Do the write */
@@ -4157,7 +4161,7 @@ hfs_copy_extent(
                kmem_free(kernel_map, (vm_offset_t)buffer, bufferSize);
 
        /* Make sure all writes have been flushed to disk. */
                kmem_free(kernel_map, (vm_offset_t)buffer, bufferSize);
 
        /* Make sure all writes have been flushed to disk. */
-       if (!journal_uses_fua(hfsmp->jnl)) {
+       if (vnode_issystem(vp) && !journal_uses_fua(hfsmp->jnl)) {
                err = VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, context);
                if (err) {
                        printf("hfs_copy_extent: DKIOCSYNCHRONIZECACHE failed (%d)\n", err);
                err = VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, context);
                if (err) {
                        printf("hfs_copy_extent: DKIOCSYNCHRONIZECACHE failed (%d)\n", err);
@@ -4172,8 +4176,15 @@ hfs_copy_extent(
 }
 
 
 }
 
 
+static int
+hfs_relocate_callback(__unused HFSPlusExtentKey *key, HFSPlusExtentRecord *record, HFSPlusExtentRecord *state)
+{
+       bcopy(state, record, sizeof(HFSPlusExtentRecord));
+       return 0;
+}
+
 /*
 /*
- * Reclaim space at the end of a volume, used by a given system file.
+ * Reclaim space at the end of a volume, used by a given file.
  *
  * This routine attempts to move any extent which contains allocation blocks
  * at or after "startblk."  A separate transaction is used to do the move.
  *
  * This routine attempts to move any extent which contains allocation blocks
  * at or after "startblk."  A separate transaction is used to do the move.
@@ -4182,109 +4193,191 @@ hfs_copy_extent(
  * of a transaction have their physical block numbers invalidated so they will
  * eventually be written to their new locations.
  *
  * of a transaction have their physical block numbers invalidated so they will
  * eventually be written to their new locations.
  *
- * This routine can be used to move overflow extents for the allocation file.
- *
  * Inputs:
  *    hfsmp       The volume being resized.
  *    startblk    Blocks >= this allocation block need to be moved.
  *    locks       Which locks need to be taken for the given system file.
  *    vp          The vnode for the system file.
  *
  * Inputs:
  *    hfsmp       The volume being resized.
  *    startblk    Blocks >= this allocation block need to be moved.
  *    locks       Which locks need to be taken for the given system file.
  *    vp          The vnode for the system file.
  *
+ *    The caller of this function, hfs_reclaimspace(), grabs cnode lock 
+ *    for non-system files before calling this function.  
+ *
  * Outputs:
  * Outputs:
- *    moved       Set to true if any extents were moved.
+ *    blks_moved  Total number of allocation blocks moved by this routine.
  */
 static int
  */
 static int
-hfs_relocate_callback(__unused HFSPlusExtentKey *key, HFSPlusExtentRecord *record, HFSPlusExtentRecord *state)
-{
-       bcopy(state, record, sizeof(HFSPlusExtentRecord));
-       return 0;
-}
-static int
-hfs_reclaim_sys_file(struct hfsmount *hfsmp, struct vnode *vp, u_long startblk, int locks, Boolean *moved, vfs_context_t context)
+hfs_reclaim_file(struct hfsmount *hfsmp, struct vnode *vp, u_long startblk, int locks, u_int32_t *blks_moved, vfs_context_t context)
 {
        int error;
        int lockflags;
        int i;
        u_long datablks;
 {
        int error;
        int lockflags;
        int i;
        u_long datablks;
-       u_long block;
+       u_long end_block;
        u_int32_t oldStartBlock;
        u_int32_t newStartBlock;
        u_int32_t oldStartBlock;
        u_int32_t newStartBlock;
-       u_int32_t blockCount;
+       u_int32_t oldBlockCount;
+       u_int32_t newBlockCount;
        struct filefork *fp;
        struct filefork *fp;
-
+       struct cnode *cp;
+       int is_sysfile;
+       int took_truncate_lock = 0;
+       struct BTreeIterator *iterator = NULL;
+       u_int8_t forktype;
+       u_int32_t fileID;
+               
        /* If there is no vnode for this file, then there's nothing to do. */   
        if (vp == NULL)
                return 0;
 
        /* If there is no vnode for this file, then there's nothing to do. */   
        if (vp == NULL)
                return 0;
 
-       /* printf("hfs_reclaim_sys_file: %.*s\n", VTOC(vp)->c_desc.cd_namelen, VTOC(vp)->c_desc.cd_nameptr); */
+       cp = VTOC(vp);
+       fileID = cp->c_cnid;
+       is_sysfile = vnode_issystem(vp);
+       forktype = VNODE_IS_RSRC(vp) ? 0xFF : 0;
+
+       /* Flush all the buffer cache blocks and cluster pages associated with 
+        * this vnode.  
+        *
+        * If the current vnode is a system vnode, all the buffer cache blocks 
+        * associated with it should already be sync'ed to the disk as part of 
+        * journal flush in hfs_truncatefs().  Normally there should not be 
+        * buffer cache blocks for regular files, but for objects like symlinks,
+        * we can have buffer cache blocks associated with the vnode.  Therefore
+        * we call buf_flushdirtyblks() always.  Resource fork data for directory 
+        * hard links are directly written using buffer cache for device vnode, 
+        * which should also be sync'ed as part of journal flush in hfs_truncatefs().
+        * 
+        * Flushing cluster pages should be the normal case for regular files, 
+        * and really should not do anything for system files.  But just to be 
+        * sure that all blocks associated with this vnode is sync'ed to the 
+        * disk, we call both buffer cache and cluster layer functions.  
+        */
+       buf_flushdirtyblks(vp, MNT_NOWAIT, 0, "hfs_reclaim_file");
        
        
+       if (!is_sysfile) {
+               /* The caller grabs cnode lock for non-system files only, therefore 
+                * we unlock only non-system files before calling cluster layer.
+                */
+               hfs_unlock(cp);
+               hfs_lock_truncate(cp, TRUE);
+               took_truncate_lock = 1;
+       }
+       (void) cluster_push(vp, 0);
+       if (!is_sysfile) {
+               error = hfs_lock(cp, HFS_FORCE_LOCK);
+               if (error) {
+                       hfs_unlock_truncate(cp, TRUE);
+                       return error;
+               }
+
+               /* If the file no longer exists, nothing left to do */
+               if (cp->c_flag & C_NOEXISTS) {
+                       hfs_unlock_truncate(cp, TRUE);
+                       return 0;
+               }
+       }
+
+       /* Wait for any in-progress writes to this vnode to complete, so that we'll
+        * be copying consistent bits.  (Otherwise, it's possible that an async
+        * write will complete to the old extent after we read from it.  That
+        * could lead to corruption.)
+        */
+       error = vnode_waitforwrites(vp, 0, 0, 0, "hfs_reclaim_file");
+       if (error) {
+               printf("hfs_reclaim_file: Error %d from vnode_waitforwrites\n", error);
+               return error;
+       }
+
+       if (hfs_resize_debug) {
+               printf("hfs_reclaim_file: Start relocating %sfork for fileid=%u name=%.*s\n", (forktype ? "rsrc" : "data"), fileID, cp->c_desc.cd_namelen, cp->c_desc.cd_nameptr);
+       }
+
        /* We always need the allocation bitmap and extents B-tree */
        locks |= SFL_BITMAP | SFL_EXTENTS;
        
        error = hfs_start_transaction(hfsmp);
        if (error) {
        /* We always need the allocation bitmap and extents B-tree */
        locks |= SFL_BITMAP | SFL_EXTENTS;
        
        error = hfs_start_transaction(hfsmp);
        if (error) {
-               printf("hfs_reclaim_sys_file: hfs_start_transaction returned %d\n", error);
+               printf("hfs_reclaim_file: hfs_start_transaction returned %d\n", error);
+               if (took_truncate_lock) {
+                       hfs_unlock_truncate(cp, TRUE);
+               }
                return error;
        }
        lockflags = hfs_systemfile_lock(hfsmp, locks, HFS_EXCLUSIVE_LOCK);
        fp = VTOF(vp);
        datablks = 0;
                return error;
        }
        lockflags = hfs_systemfile_lock(hfsmp, locks, HFS_EXCLUSIVE_LOCK);
        fp = VTOF(vp);
        datablks = 0;
+       *blks_moved = 0;
 
        /* Relocate non-overflow extents */
        for (i = 0; i < kHFSPlusExtentDensity; ++i) {
                if (fp->ff_extents[i].blockCount == 0)
                        break;
                oldStartBlock = fp->ff_extents[i].startBlock;
 
        /* Relocate non-overflow extents */
        for (i = 0; i < kHFSPlusExtentDensity; ++i) {
                if (fp->ff_extents[i].blockCount == 0)
                        break;
                oldStartBlock = fp->ff_extents[i].startBlock;
-               blockCount = fp->ff_extents[i].blockCount;
-               datablks += blockCount;
-               block = oldStartBlock + blockCount;
-               if (block > startblk) {
-                       error = BlockAllocate(hfsmp, 1, blockCount, blockCount, true, true, &newStartBlock, &blockCount);
+               oldBlockCount = fp->ff_extents[i].blockCount;
+               datablks += oldBlockCount;
+               end_block = oldStartBlock + oldBlockCount;
+               /* Check if the file overlaps the target space */
+               if (end_block > startblk) {
+                       /* Allocate a new extent */
+                       error = BlockAllocate(hfsmp, 1, oldBlockCount, oldBlockCount, true, (is_sysfile ? true : false), &newStartBlock, &newBlockCount);
                        if (error) {
                        if (error) {
-                               printf("hfs_reclaim_sys_file: BlockAllocate returned %d\n", error);
+                               printf("hfs_reclaim_file: BlockAllocate (error=%d) for fileID=%u %u:(%u,%u)\n", error, fileID, i, oldStartBlock, oldBlockCount);
                                goto fail;
                        }
                                goto fail;
                        }
-                       if (blockCount != fp->ff_extents[i].blockCount) {
-                               printf("hfs_reclaim_sys_file: new blockCount=%u, original blockCount=%u", blockCount, fp->ff_extents[i].blockCount);
-                               goto free_fail;
+                       if (newBlockCount != oldBlockCount) {
+                               printf("hfs_reclaim_file: fileID=%u - newBlockCount=%u, oldBlockCount=%u", fileID, newBlockCount, oldBlockCount);
+                               if (BlockDeallocate(hfsmp, newStartBlock, newBlockCount)) {
+                                       hfs_mark_volume_inconsistent(hfsmp);
+                               }
+                               goto fail;
                        }
                        }
-                       error = hfs_copy_extent(hfsmp, vp, oldStartBlock, newStartBlock, blockCount, context);
+
+                       /* Copy data from old location to new location */
+                       error = hfs_copy_extent(hfsmp, vp, oldStartBlock, newStartBlock, newBlockCount, context);
                        if (error) {
                        if (error) {
-                               printf("hfs_reclaim_sys_file: hfs_copy_extent returned %d\n", error);
-                               goto free_fail;
+                               printf("hfs_reclaim_file: hfs_copy_extent error=%d for fileID=%u %u:(%u,%u) to %u:(%u,%u)\n", error, fileID, i, oldStartBlock, oldBlockCount, i, newStartBlock, newBlockCount);
+                               if (BlockDeallocate(hfsmp, newStartBlock, newBlockCount)) {
+                                       hfs_mark_volume_inconsistent(hfsmp);
+                               }
+                               goto fail;
                        }
                        fp->ff_extents[i].startBlock = newStartBlock;
                        }
                        fp->ff_extents[i].startBlock = newStartBlock;
-                       VTOC(vp)->c_flag |= C_MODIFIED;
-                       *moved = true;
-                       error = BlockDeallocate(hfsmp, oldStartBlock, blockCount);
+                       cp->c_flag |= C_MODIFIED;
+                       *blks_moved += newBlockCount;
+
+                       /* Deallocate the old extent */
+                       error = BlockDeallocate(hfsmp, oldStartBlock, oldBlockCount);
                        if (error) {
                        if (error) {
-                               /* TODO: Mark volume inconsistent? */
-                               printf("hfs_reclaim_sys_file: BlockDeallocate returned %d\n", error);
+                               printf("hfs_reclaim_file: BlockDeallocate returned %d\n", error);
+                               hfs_mark_volume_inconsistent(hfsmp);
                                goto fail;
                        }
                                goto fail;
                        }
-                       error = hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
-                       if (error) {
-                               /* TODO: Mark volume inconsistent? */
-                               printf("hfs_reclaim_sys_file: hfs_flushvolumeheader returned %d\n", error);
-                               goto fail;
+
+                       /* If this is a system file, sync the volume header on disk */
+                       if (is_sysfile) {
+                               error = hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
+                               if (error) {
+                                       printf("hfs_reclaim_file: hfs_flushvolumeheader returned %d\n", error);
+                                       hfs_mark_volume_inconsistent(hfsmp);
+                                       goto fail;
+                               }
+                       }
+
+                       if (hfs_resize_debug) {
+                               printf ("hfs_reclaim_file: Relocated %u:(%u,%u) to %u:(%u,%u)\n", i, oldStartBlock, oldBlockCount, i, newStartBlock, newBlockCount);
                        }
                }
        }
 
        /* Relocate overflow extents (if any) */
        if (i == kHFSPlusExtentDensity && fp->ff_blocks > datablks) {
                        }
                }
        }
 
        /* Relocate overflow extents (if any) */
        if (i == kHFSPlusExtentDensity && fp->ff_blocks > datablks) {
-               struct BTreeIterator *iterator = NULL;
                struct FSBufferDescriptor btdata;
                HFSPlusExtentRecord record;
                HFSPlusExtentKey *key;
                FCB *fcb;
                struct FSBufferDescriptor btdata;
                HFSPlusExtentRecord record;
                HFSPlusExtentKey *key;
                FCB *fcb;
-               u_int32_t fileID;
-               u_int8_t forktype;
+               int overflow_count = 0;
 
 
-               forktype = VNODE_IS_RSRC(vp) ? 0xFF : 0;
-               fileID = VTOC(vp)->c_cnid;
                if (kmem_alloc(kernel_map, (vm_offset_t*) &iterator, sizeof(*iterator))) {
                if (kmem_alloc(kernel_map, (vm_offset_t*) &iterator, sizeof(*iterator))) {
-                       printf("hfs_reclaim_sys_file: kmem_alloc failed!\n");
+                       printf("hfs_reclaim_file: kmem_alloc failed!\n");
                        error = ENOMEM;
                        goto fail;
                }
                        error = ENOMEM;
                        goto fail;
                }
@@ -4305,40 +4398,51 @@ hfs_reclaim_sys_file(struct hfsmount *hfsmp, struct vnode *vp, u_long startblk,
                error = BTSearchRecord(fcb, iterator, &btdata, NULL, iterator);
                while (error == 0) {
                        /* Stop when we encounter a different file or fork. */
                error = BTSearchRecord(fcb, iterator, &btdata, NULL, iterator);
                while (error == 0) {
                        /* Stop when we encounter a different file or fork. */
-                       if ((key->fileID != fileID) ||
-                               (key->forkType != forktype)) {
+                       if ((key->fileID != fileID) || 
+                           (key->forkType != forktype)) {
                                break;
                        }
                                break;
                        }
+               
+                       /* Just track the overflow extent record number for debugging... */
+                       if (hfs_resize_debug) {
+                               overflow_count++;
+                       }
+
                        /* 
                         * Check if the file overlaps target space.
                         */
                        for (i = 0; i < kHFSPlusExtentDensity; ++i) {
                                if (record[i].blockCount == 0) {
                        /* 
                         * Check if the file overlaps target space.
                         */
                        for (i = 0; i < kHFSPlusExtentDensity; ++i) {
                                if (record[i].blockCount == 0) {
-                                       goto overflow_done;
+                                       goto fail;
                                }
                                oldStartBlock = record[i].startBlock;
                                }
                                oldStartBlock = record[i].startBlock;
-                               blockCount = record[i].blockCount;
-                               block = oldStartBlock + blockCount;
-                               if (block > startblk) {
-                                       error = BlockAllocate(hfsmp, 1, blockCount, blockCount, true, true, &newStartBlock, &blockCount);
+                               oldBlockCount = record[i].blockCount;
+                               end_block = oldStartBlock + oldBlockCount;
+                               if (end_block > startblk) {
+                                       error = BlockAllocate(hfsmp, 1, oldBlockCount, oldBlockCount, true, (is_sysfile ? true : false), &newStartBlock, &newBlockCount);
                                        if (error) {
                                        if (error) {
-                                               printf("hfs_reclaim_sys_file: BlockAllocate returned %d\n", error);
-                                               goto overflow_done;
+                                               printf("hfs_reclaim_file: BlockAllocate (error=%d) for fileID=%u %u:(%u,%u)\n", error, fileID, i, oldStartBlock, oldBlockCount);
+                                               goto fail;
                                        }
                                        }
-                                       if (blockCount != record[i].blockCount) {
-                                               printf("hfs_reclaim_sys_file: new blockCount=%u, original blockCount=%u", blockCount, fp->ff_extents[i].blockCount);
-                                               kmem_free(kernel_map, (vm_offset_t)iterator, sizeof(*iterator));
-                                               goto free_fail;
+                                       if (newBlockCount != oldBlockCount) {
+                                               printf("hfs_reclaim_file: fileID=%u - newBlockCount=%u, oldBlockCount=%u", fileID, newBlockCount, oldBlockCount);
+                                               if (BlockDeallocate(hfsmp, newStartBlock, newBlockCount)) {
+                                                       hfs_mark_volume_inconsistent(hfsmp);
+                                               }
+                                               goto fail;
                                        }
                                        }
-                                       error = hfs_copy_extent(hfsmp, vp, oldStartBlock, newStartBlock, blockCount, context);
+                                       error = hfs_copy_extent(hfsmp, vp, oldStartBlock, newStartBlock, newBlockCount, context);
                                        if (error) {
                                        if (error) {
-                                               printf("hfs_reclaim_sys_file: hfs_copy_extent returned %d\n", error);
-                                               kmem_free(kernel_map, (vm_offset_t)iterator, sizeof(*iterator));
-                                               goto free_fail;
+                                               printf("hfs_reclaim_file: hfs_copy_extent error=%d for fileID=%u (%u,%u) to (%u,%u)\n", error, fileID, oldStartBlock, oldBlockCount, newStartBlock, newBlockCount);
+                                               if (BlockDeallocate(hfsmp, newStartBlock, newBlockCount)) {
+                                                       hfs_mark_volume_inconsistent(hfsmp);
+                                               }
+                                               goto fail;
                                        }
                                        record[i].startBlock = newStartBlock;
                                        }
                                        record[i].startBlock = newStartBlock;
-                                       VTOC(vp)->c_flag |= C_MODIFIED;
-                                       *moved = true;
+                                       cp->c_flag |= C_MODIFIED;
+                                       *blks_moved += newBlockCount;
+
                                        /*
                                         * NOTE: To support relocating overflow extents of the
                                         * allocation file, we must update the BTree record BEFORE
                                        /*
                                         * NOTE: To support relocating overflow extents of the
                                         * allocation file, we must update the BTree record BEFORE
@@ -4349,15 +4453,18 @@ hfs_reclaim_sys_file(struct hfsmount *hfsmp, struct vnode *vp, u_long startblk,
                                         */
                                        error = BTUpdateRecord(fcb, iterator, (IterateCallBackProcPtr) hfs_relocate_callback, &record);
                                        if (error) {
                                         */
                                        error = BTUpdateRecord(fcb, iterator, (IterateCallBackProcPtr) hfs_relocate_callback, &record);
                                        if (error) {
-                                               /* TODO: Mark volume inconsistent? */
-                                               printf("hfs_reclaim_sys_file: BTUpdateRecord returned %d\n", error);
-                                               goto overflow_done;
+                                               printf("hfs_reclaim_file: BTUpdateRecord returned %d\n", error);
+                                               hfs_mark_volume_inconsistent(hfsmp);
+                                               goto fail;
                                        }
                                        }
-                                       error = BlockDeallocate(hfsmp, oldStartBlock, blockCount);
+                                       error = BlockDeallocate(hfsmp, oldStartBlock, oldBlockCount);
                                        if (error) {
                                        if (error) {
-                                               /* TODO: Mark volume inconsistent? */
-                                               printf("hfs_reclaim_sys_file: BlockDeallocate returned %d\n", error);
-                                               goto overflow_done;
+                                               printf("hfs_reclaim_file: BlockDeallocate returned %d\n", error);
+                                               hfs_mark_volume_inconsistent(hfsmp);
+                                               goto fail;
+                                       }
+                                       if (hfs_resize_debug) {
+                                               printf ("hfs_reclaim_file: Relocated overflow#%d %u:(%u,%u) to %u:(%u,%u)\n", overflow_count, i, oldStartBlock, oldBlockCount, i, newStartBlock, newBlockCount);
                                        }
                                }
                        }
                                        }
                                }
                        }
@@ -4368,26 +4475,29 @@ hfs_reclaim_sys_file(struct hfsmount *hfsmp, struct vnode *vp, u_long startblk,
                                break;
                        }
                }
                                break;
                        }
                }
-overflow_done:
-               kmem_free(kernel_map, (vm_offset_t)iterator, sizeof(*iterator));
-               if (error) {
-                       goto fail;
-               }
        }
        
        }
        
-       hfs_systemfile_unlock(hfsmp, lockflags);
-       error = hfs_end_transaction(hfsmp);
-       if (error) {
-               printf("hfs_reclaim_sys_file: hfs_end_transaction returned %d\n", error);
+fail:
+       if (iterator) {
+               kmem_free(kernel_map, (vm_offset_t)iterator, sizeof(*iterator));
        }
 
        }
 
-       return error;
-
-free_fail:
-       (void) BlockDeallocate(hfsmp, newStartBlock, blockCount);
-fail:
        (void) hfs_systemfile_unlock(hfsmp, lockflags);
        (void) hfs_systemfile_unlock(hfsmp, lockflags);
+
+       if ((*blks_moved != 0) && (is_sysfile == false)) {
+               (void) hfs_update(vp, MNT_WAIT);
+       }
+
        (void) hfs_end_transaction(hfsmp);
        (void) hfs_end_transaction(hfsmp);
+
+       if (took_truncate_lock) {
+               hfs_unlock_truncate(cp, TRUE);
+       }
+
+       if (hfs_resize_debug) {
+               printf("hfs_reclaim_file: Finished relocating %sfork for fileid=%u (error=%d)\n", (forktype ? "rsrc" : "data"), fileID, error);
+       }
+
        return error;
 }
 
        return error;
 }
 
@@ -4453,6 +4563,7 @@ hfs_reclaim_journal_file(struct hfsmount *hfsmp, vfs_context_t context)
 {
        int error;
        int lockflags;
 {
        int error;
        int lockflags;
+       u_int32_t oldStartBlock;
        u_int32_t newStartBlock;
        u_int32_t oldBlockCount;
        u_int32_t newBlockCount;
        u_int32_t newStartBlock;
        u_int32_t oldBlockCount;
        u_int32_t newBlockCount;
@@ -4493,6 +4604,7 @@ hfs_reclaim_journal_file(struct hfsmount *hfsmp, vfs_context_t context)
                printf("hfs_reclaim_journal_file: cat_idlookup returned %d\n", error);
                goto free_fail;
        }
                printf("hfs_reclaim_journal_file: cat_idlookup returned %d\n", error);
                goto free_fail;
        }
+       oldStartBlock = journal_fork.cf_extents[0].startBlock;
        journal_fork.cf_size = newBlockCount * hfsmp->blockSize;
        journal_fork.cf_extents[0].startBlock = newStartBlock;
        journal_fork.cf_extents[0].blockCount = newBlockCount;
        journal_fork.cf_size = newBlockCount * hfsmp->blockSize;
        journal_fork.cf_extents[0].startBlock = newStartBlock;
        journal_fork.cf_extents[0].blockCount = newBlockCount;
@@ -4524,6 +4636,9 @@ hfs_reclaim_journal_file(struct hfsmount *hfsmp, vfs_context_t context)
                printf("hfs_reclaim_journal_file: hfs_end_transaction returned %d\n", error);
        }
        
                printf("hfs_reclaim_journal_file: hfs_end_transaction returned %d\n", error);
        }
        
+       if (!error && hfs_resize_debug) {
+               printf ("hfs_reclaim_journal_file: Successfully relocated journal from (%u,%u) to (%u,%u)\n", oldStartBlock, oldBlockCount, newStartBlock, newBlockCount);
+       }
        return error;
 
 free_fail:
        return error;
 
 free_fail:
@@ -4531,6 +4646,9 @@ free_fail:
 fail:
        hfs_systemfile_unlock(hfsmp, lockflags);
        (void) hfs_end_transaction(hfsmp);
 fail:
        hfs_systemfile_unlock(hfsmp, lockflags);
        (void) hfs_end_transaction(hfsmp);
+       if (hfs_resize_debug) {
+               printf ("hfs_reclaim_journal_file: Error relocating journal file (error=%d)\n", error);
+       }
        return error;
 }
 
        return error;
 }
 
@@ -4545,6 +4663,7 @@ hfs_reclaim_journal_info_block(struct hfsmount *hfsmp, vfs_context_t context)
 {
        int error;
        int lockflags;
 {
        int error;
        int lockflags;
+       u_int32_t oldBlock;
        u_int32_t newBlock;
        u_int32_t blockCount;
        struct cat_desc jib_desc;
        u_int32_t newBlock;
        u_int32_t blockCount;
        struct cat_desc jib_desc;
@@ -4608,6 +4727,7 @@ hfs_reclaim_journal_info_block(struct hfsmount *hfsmp, vfs_context_t context)
                printf("hfs_reclaim_journal_file: cat_idlookup returned %d\n", error);
                goto fail;
        }
                printf("hfs_reclaim_journal_file: cat_idlookup returned %d\n", error);
                goto fail;
        }
+       oldBlock = jib_fork.cf_extents[0].startBlock;
        jib_fork.cf_size = hfsmp->blockSize;
        jib_fork.cf_extents[0].startBlock = newBlock;
        jib_fork.cf_extents[0].blockCount = 1;
        jib_fork.cf_size = hfsmp->blockSize;
        jib_fork.cf_extents[0].startBlock = newBlock;
        jib_fork.cf_extents[0].blockCount = 1;
@@ -4635,6 +4755,10 @@ hfs_reclaim_journal_info_block(struct hfsmount *hfsmp, vfs_context_t context)
        if (error) {
                printf("hfs_reclaim_journal_info_block: journal_flush returned %d\n", error);
        }
        if (error) {
                printf("hfs_reclaim_journal_info_block: journal_flush returned %d\n", error);
        }
+
+       if (!error && hfs_resize_debug) {
+               printf ("hfs_reclaim_journal_info_block: Successfully relocated journal info block from (%u,%u) to (%u,%u)\n", oldBlock, blockCount, newBlock, blockCount);
+       }
        return error;
 
 free_fail:
        return error;
 
 free_fail:
@@ -4642,12 +4766,19 @@ free_fail:
 fail:
        hfs_systemfile_unlock(hfsmp, lockflags);
        (void) hfs_end_transaction(hfsmp);
 fail:
        hfs_systemfile_unlock(hfsmp, lockflags);
        (void) hfs_end_transaction(hfsmp);
+       if (hfs_resize_debug) {
+               printf ("hfs_reclaim_journal_info_block: Error relocating journal info block (error=%d)\n", error);
+       }
        return error;
 }
 
 
 /*
  * Reclaim space at the end of a file system.
        return error;
 }
 
 
 /*
  * Reclaim space at the end of a file system.
+ *
+ * Inputs - 
+ *     startblk        - start block of the space being reclaimed
+ *     reclaimblks     - number of allocation blocks to reclaim
  */
 static int
 hfs_reclaimspace(struct hfsmount *hfsmp, u_int32_t startblk, u_int32_t reclaimblks, vfs_context_t context)
  */
 static int
 hfs_reclaimspace(struct hfsmount *hfsmp, u_int32_t startblk, u_int32_t reclaimblks, vfs_context_t context)
@@ -4663,45 +4794,53 @@ hfs_reclaimspace(struct hfsmount *hfsmp, u_int32_t startblk, u_int32_t reclaimbl
        int filecnt = 0;
        int maxfilecnt;
        u_int32_t block;
        int filecnt = 0;
        int maxfilecnt;
        u_int32_t block;
-       u_int32_t datablks;
-       u_int32_t rsrcblks;
-       u_int32_t blkstomove = 0;
        int lockflags;
        int lockflags;
-       int i;
+       int i, j;
        int error;
        int lastprogress = 0;
        int error;
        int lastprogress = 0;
-       Boolean system_file_moved = false;
+       u_int32_t blks_moved = 0;
+       u_int32_t total_blks_moved = 0;
+       Boolean need_relocate;
 
        /* Relocate extents of the Allocation file if they're in the way. */
 
        /* Relocate extents of the Allocation file if they're in the way. */
-       error = hfs_reclaim_sys_file(hfsmp, hfsmp->hfs_allocation_vp, startblk, SFL_BITMAP, &system_file_moved, context);
+       error = hfs_reclaim_file(hfsmp, hfsmp->hfs_allocation_vp, startblk, SFL_BITMAP, &blks_moved, context);
        if (error) {
                printf("hfs_reclaimspace: reclaim allocation file returned %d\n", error);
                return error;
        }
        if (error) {
                printf("hfs_reclaimspace: reclaim allocation file returned %d\n", error);
                return error;
        }
+       total_blks_moved += blks_moved;
+
        /* Relocate extents of the Extents B-tree if they're in the way. */
        /* Relocate extents of the Extents B-tree if they're in the way. */
-       error = hfs_reclaim_sys_file(hfsmp, hfsmp->hfs_extents_vp, startblk, SFL_EXTENTS, &system_file_moved, context);
+       error = hfs_reclaim_file(hfsmp, hfsmp->hfs_extents_vp, startblk, SFL_EXTENTS, &blks_moved, context);
        if (error) {
                printf("hfs_reclaimspace: reclaim extents b-tree returned %d\n", error);
                return error;
        }
        if (error) {
                printf("hfs_reclaimspace: reclaim extents b-tree returned %d\n", error);
                return error;
        }
+       total_blks_moved += blks_moved;
+
        /* Relocate extents of the Catalog B-tree if they're in the way. */
        /* Relocate extents of the Catalog B-tree if they're in the way. */
-       error = hfs_reclaim_sys_file(hfsmp, hfsmp->hfs_catalog_vp, startblk, SFL_CATALOG, &system_file_moved, context);
+       error = hfs_reclaim_file(hfsmp, hfsmp->hfs_catalog_vp, startblk, SFL_CATALOG, &blks_moved, context);
        if (error) {
                printf("hfs_reclaimspace: reclaim catalog b-tree returned %d\n", error);
                return error;
        }
        if (error) {
                printf("hfs_reclaimspace: reclaim catalog b-tree returned %d\n", error);
                return error;
        }
+       total_blks_moved += blks_moved;
+
        /* Relocate extents of the Attributes B-tree if they're in the way. */
        /* Relocate extents of the Attributes B-tree if they're in the way. */
-       error = hfs_reclaim_sys_file(hfsmp, hfsmp->hfs_attribute_vp, startblk, SFL_ATTRIBUTE, &system_file_moved, context);
+       error = hfs_reclaim_file(hfsmp, hfsmp->hfs_attribute_vp, startblk, SFL_ATTRIBUTE, &blks_moved, context);
        if (error) {
                printf("hfs_reclaimspace: reclaim attribute b-tree returned %d\n", error);
                return error;
        }
        if (error) {
                printf("hfs_reclaimspace: reclaim attribute b-tree returned %d\n", error);
                return error;
        }
+       total_blks_moved += blks_moved;
+
        /* Relocate extents of the Startup File if there is one and they're in the way. */
        /* Relocate extents of the Startup File if there is one and they're in the way. */
-       error = hfs_reclaim_sys_file(hfsmp, hfsmp->hfs_startup_vp, startblk, SFL_STARTUP, &system_file_moved, context);
+       error = hfs_reclaim_file(hfsmp, hfsmp->hfs_startup_vp, startblk, SFL_STARTUP, &blks_moved, context);
        if (error) {
                printf("hfs_reclaimspace: reclaim startup file returned %d\n", error);
                return error;
        }
        if (error) {
                printf("hfs_reclaimspace: reclaim startup file returned %d\n", error);
                return error;
        }
+       total_blks_moved += blks_moved;
        
        /*
         * We need to make sure the alternate volume header gets flushed if we moved
        
        /*
         * We need to make sure the alternate volume header gets flushed if we moved
@@ -4709,12 +4848,13 @@ hfs_reclaimspace(struct hfsmount *hfsmp, u_int32_t startblk, u_int32_t reclaimbl
         * shrinking the size of the volume, or else the journal code will panic
         * with an invalid (too large) block number.
         *
         * shrinking the size of the volume, or else the journal code will panic
         * with an invalid (too large) block number.
         *
-        * Note that system_file_moved will be set if ANY extent was moved, even
+        * Note that total_blks_moved will be set if ANY extent was moved, even
         * if it was just an overflow extent.  In this case, the journal_flush isn't
         * strictly required, but shouldn't hurt.
         */
         * if it was just an overflow extent.  In this case, the journal_flush isn't
         * strictly required, but shouldn't hurt.
         */
-       if (system_file_moved)
+       if (total_blks_moved) {
                hfs_journal_flush(hfsmp);
                hfs_journal_flush(hfsmp);
+       }
 
        if (hfsmp->jnl_start + (hfsmp->jnl_size / hfsmp->blockSize) > startblk) {
                error = hfs_reclaim_journal_file(hfsmp, context);
 
        if (hfsmp->jnl_start + (hfsmp->jnl_size / hfsmp->blockSize) > startblk) {
                error = hfs_reclaim_journal_file(hfsmp, context);
@@ -4745,6 +4885,7 @@ hfs_reclaimspace(struct hfsmount *hfsmp, u_int32_t startblk, u_int32_t reclaimbl
        }       
 
        saved_next_allocation = hfsmp->nextAllocation;
        }       
 
        saved_next_allocation = hfsmp->nextAllocation;
+       /* Always try allocating new blocks after the metadata zone */
        HFS_UPDATE_NEXT_ALLOCATION(hfsmp, hfsmp->hfs_metazone_start);
 
        fcb = VTOF(hfsmp->hfs_catalog_vp);
        HFS_UPDATE_NEXT_ALLOCATION(hfsmp, hfsmp->hfs_metazone_start);
 
        fcb = VTOF(hfsmp->hfs_catalog_vp);
@@ -4763,7 +4904,8 @@ hfs_reclaimspace(struct hfsmount *hfsmp, u_int32_t startblk, u_int32_t reclaimbl
        }
        /*
         * Iterate over all the catalog records looking for files
        }
        /*
         * Iterate over all the catalog records looking for files
-        * that overlap into the space we're trying to free up.
+        * that overlap into the space we're trying to free up and 
+        * the total number of blocks that will require relocation.
         */
        for (filecnt = 0; filecnt < maxfilecnt; ) {
                error = BTIterateRecord(fcb, kBTreeNextRecord, iterator, &btdata, NULL);
         */
        for (filecnt = 0; filecnt < maxfilecnt; ) {
                error = BTIterateRecord(fcb, kBTreeNextRecord, iterator, &btdata, NULL);
@@ -4776,58 +4918,64 @@ hfs_reclaimspace(struct hfsmount *hfsmp, u_int32_t startblk, u_int32_t reclaimbl
                if (filerec.recordType != kHFSPlusFileRecord) {
                        continue;
                }
                if (filerec.recordType != kHFSPlusFileRecord) {
                        continue;
                }
-               datablks = rsrcblks = 0;
-               /* 
-                * Check if either fork overlaps target space.
-                */
+
+               need_relocate = false;
+               /* Check if data fork overlaps the target space */
                for (i = 0; i < kHFSPlusExtentDensity; ++i) {
                for (i = 0; i < kHFSPlusExtentDensity; ++i) {
-                       if (filerec.dataFork.extents[i].blockCount != 0) {
-                               datablks += filerec.dataFork.extents[i].blockCount;
-                               block = filerec.dataFork.extents[i].startBlock +
-                                               filerec.dataFork.extents[i].blockCount;
-                               if (block >= startblk) {
-                                       if ((filerec.fileID == hfsmp->hfs_jnlfileid) ||
-                                               (filerec.fileID == hfsmp->hfs_jnlinfoblkid)) {
-                                               printf("hfs_reclaimspace: cannot move active journal\n");
-                                               error = EPERM;
-                                               goto end_iteration;
-                                       }
-                                       cnidbufp[filecnt++] = filerec.fileID;
-                                       blkstomove += filerec.dataFork.totalBlocks;
-                                       break;
-                               }
+                       if (filerec.dataFork.extents[i].blockCount == 0) {
+                               break;
                        }
                        }
-                       if (filerec.resourceFork.extents[i].blockCount != 0) {
-                               rsrcblks += filerec.resourceFork.extents[i].blockCount;
-                               block = filerec.resourceFork.extents[i].startBlock +
-                                               filerec.resourceFork.extents[i].blockCount;
-                               if (block >= startblk) {
-                                       cnidbufp[filecnt++] = filerec.fileID;
-                                       blkstomove += filerec.resourceFork.totalBlocks;
-                                       break;
+                       block = filerec.dataFork.extents[i].startBlock +
+                               filerec.dataFork.extents[i].blockCount;
+                       if (block >= startblk) {
+                               if ((filerec.fileID == hfsmp->hfs_jnlfileid) ||
+                                   (filerec.fileID == hfsmp->hfs_jnlinfoblkid)) {
+                                       printf("hfs_reclaimspace: cannot move active journal\n");
+                                       error = EPERM;
+                                       goto end_iteration;
                                }
                                }
+                               need_relocate = true;
+                               goto save_fileid;
                        }
                }
                        }
                }
-               /*
-                * Check for any overflow extents that overlap.
-                */
-               if (i == kHFSPlusExtentDensity) {
-                       if (filerec.dataFork.totalBlocks > datablks) {
-                               if (hfs_overlapped_overflow_extents(hfsmp, startblk, datablks, filerec.fileID, 0)) {
-                                       cnidbufp[filecnt++] = filerec.fileID;
-                                       blkstomove += filerec.dataFork.totalBlocks;
-                               }
-                       } else if (filerec.resourceFork.totalBlocks > rsrcblks) {
-                               if (hfs_overlapped_overflow_extents(hfsmp, startblk, rsrcblks, filerec.fileID, 1)) {
-                                       cnidbufp[filecnt++] = filerec.fileID;
-                                       blkstomove += filerec.resourceFork.totalBlocks;
-                               }
+
+               /* Check if resource fork overlaps the target space */
+               for (j = 0; j < kHFSPlusExtentDensity; ++j) {
+                       if (filerec.resourceFork.extents[j].blockCount == 0) {
+                               break;
+                       }
+                       block = filerec.resourceFork.extents[j].startBlock +
+                               filerec.resourceFork.extents[j].blockCount;
+                       if (block >= startblk) {
+                               need_relocate = true;
+                               goto save_fileid;
+                       }
+               }
+
+               /* Check if any forks' overflow extents overlap the target space */
+               if ((i == kHFSPlusExtentDensity) || (j == kHFSPlusExtentDensity)) {
+                       if (hfs_overlapped_overflow_extents(hfsmp, startblk, filerec.fileID)) {
+                               need_relocate = true;
+                               goto save_fileid;
+                       }
+               }
+
+save_fileid:
+               if (need_relocate == true) {
+                       cnidbufp[filecnt++] = filerec.fileID;
+                       if (hfs_resize_debug) {
+                               printf ("hfs_reclaimspace: Will relocate extents for fileID=%u\n", filerec.fileID);
                        }
                }
        }
 
 end_iteration:
                        }
                }
        }
 
 end_iteration:
-       if (filecnt == 0 && !system_file_moved) {
+       /* If no regular file was found to be relocated and 
+        * no system file was moved, we probably do not have 
+        * enough space to relocate the system files, or 
+        * something else went wrong.
+        */
+       if ((filecnt == 0) && (total_blks_moved == 0)) {
                printf("hfs_reclaimspace: no files moved\n");
                error = ENOSPC;
        }
                printf("hfs_reclaimspace: no files moved\n");
                error = ENOSPC;
        }
@@ -4836,66 +4984,52 @@ end_iteration:
        if (error || filecnt == 0)
                goto out;
 
        if (error || filecnt == 0)
                goto out;
 
-       /*
-        * Double check space requirements to make sure
-        * there is enough space to relocate any files
-        * that reside in the reclaim area.
-        *
-        *                                          Blocks To Move --------------
-        *                                                            |    |    |
-        *                                                            V    V    V
-        * ------------------------------------------------------------------------
-        * |                                                        | /   ///  // |
-        * |                                                        | /   ///  // |
-        * |                                                        | /   ///  // |
-        * ------------------------------------------------------------------------
-        *
-        * <------------------- New Total Blocks ------------------><-- Reclaim -->
-        *
-        * <------------------------ Original Total Blocks ----------------------->
-        *
-        */
-       if (blkstomove >= hfs_freeblks(hfsmp, 1)) {
-               printf("hfs_truncatefs: insufficient space (need %u blocks; have %u blocks)\n", blkstomove, hfs_freeblks(hfsmp, 1));
-               error = ENOSPC;
-               goto out;
-       }
        hfsmp->hfs_resize_filesmoved = 0;
        hfsmp->hfs_resize_totalfiles = filecnt;
        
        /* Now move any files that are in the way. */
        for (i = 0; i < filecnt; ++i) {
        hfsmp->hfs_resize_filesmoved = 0;
        hfsmp->hfs_resize_totalfiles = filecnt;
        
        /* Now move any files that are in the way. */
        for (i = 0; i < filecnt; ++i) {
-               struct vnode * rvp;
-        struct cnode * cp;
+               struct vnode *rvp;
+               struct cnode *cp;
+               struct filefork *datafork;
 
                if (hfs_vget(hfsmp, cnidbufp[i], &vp, 0) != 0)
                        continue;
 
                if (hfs_vget(hfsmp, cnidbufp[i], &vp, 0) != 0)
                        continue;
+               
+               cp = VTOC(vp);
+               datafork = VTOF(vp);
 
 
-        /* Relocating directory hard links is not supported, so we
-         * punt (see radar 6217026). */
-        cp = VTOC(vp);
-        if ((cp->c_flag & C_HARDLINK) && vnode_isdir(vp)) {
-            printf("hfs_reclaimspace: unable to relocate directory hard link %d\n", cp->c_cnid);
-            error = EINVAL;
-            goto out;
-        }
-
-               /* Relocate any data fork blocks. */
-               if (VTOF(vp) && VTOF(vp)->ff_blocks > 0) {
-                       error = hfs_relocate(vp, hfsmp->hfs_metazone_end + 1, kauth_cred_get(), current_proc());
+               /* Relocating directory hard links is not supported, so we punt (see radar 6217026). */
+               if ((cp->c_flag & C_HARDLINK) && vnode_isdir(vp)) {
+                       printf("hfs_reclaimspace: Unable to relocate directory hard link id=%d\n", cp->c_cnid);
+                       error = EINVAL;
+                       goto out;
                }
                }
-               if (error) 
-                       break;
 
 
-               /* Relocate any resource fork blocks. */
-               if ((cp->c_blocks - (VTOF(vp) ? VTOF((vp))->ff_blocks : 0)) > 0) {
-                       error = hfs_vgetrsrc(hfsmp, vp, &rvp, TRUE);
-                       if (error)
+               /* Relocate any overlapping data fork blocks. */
+               if (datafork && datafork->ff_blocks > 0) {
+                       error = hfs_reclaim_file(hfsmp, vp, startblk, 0, &blks_moved, context);
+                       if (error)  {
+                               printf ("hfs_reclaimspace: Error reclaiming datafork blocks of fileid=%u (error=%d)\n", cnidbufp[i], error);
                                break;
                                break;
-                       error = hfs_relocate(rvp, hfsmp->hfs_metazone_end + 1, kauth_cred_get(), current_proc());
+                       }
+                       total_blks_moved += blks_moved;
+               }
+
+               /* Relocate any overlapping resource fork blocks. */
+               if ((cp->c_blocks - (datafork ? datafork->ff_blocks : 0)) > 0) {
+                       error = hfs_vgetrsrc(hfsmp, vp, &rvp, TRUE, TRUE);
+                       if (error) {
+                               printf ("hfs_reclaimspace: Error looking up rvp for fileid=%u (error=%d)\n", cnidbufp[i], error);
+                               break;
+                       }
+                       error = hfs_reclaim_file(hfsmp, rvp, startblk, 0, &blks_moved, context);
                        VTOC(rvp)->c_flag |= C_NEED_RVNODE_PUT;
                        VTOC(rvp)->c_flag |= C_NEED_RVNODE_PUT;
-                       if (error)
+                       if (error) {
+                               printf ("hfs_reclaimspace: Error reclaiming rsrcfork blocks of fileid=%u (error=%d)\n", cnidbufp[i], error);
                                break;
                                break;
+                       }
+                       total_blks_moved += blks_moved;
                }
                hfs_unlock(cp);
                vnode_put(vp);
                }
                hfs_unlock(cp);
                vnode_put(vp);
@@ -4920,8 +5054,8 @@ end_iteration:
                vp = NULL;
        }
        if (hfsmp->hfs_resize_filesmoved != 0) {
                vp = NULL;
        }
        if (hfsmp->hfs_resize_filesmoved != 0) {
-               printf("hfs_reclaimspace: relocated %d files on \"%s\"\n",
-                      (int)hfsmp->hfs_resize_filesmoved, hfsmp->vcbVN);
+               printf("hfs_reclaimspace: relocated %u blocks from %d files on \"%s\"\n",
+                       total_blks_moved, (int)hfsmp->hfs_resize_filesmoved, hfsmp->vcbVN);
        }
 out:
        kmem_free(kernel_map, (vm_offset_t)iterator, sizeof(*iterator));
        }
 out:
        kmem_free(kernel_map, (vm_offset_t)iterator, sizeof(*iterator));
@@ -4939,32 +5073,34 @@ out:
 
 
 /*
 
 
 /*
- * Check if there are any overflow extents that overlap.
+ * Check if there are any overflow data or resource fork extents that overlap 
+ * into the disk space that is being reclaimed.  
+ *
+ * Output - 
+ *     1 - One of the overflow extents need to be relocated
+ *     0 - No overflow extents need to be relocated, or there was an error
  */
 static int
  */
 static int
-hfs_overlapped_overflow_extents(struct hfsmount *hfsmp, u_int32_t startblk, u_int32_t catblks, u_int32_t fileID, int rsrcfork)
+hfs_overlapped_overflow_extents(struct hfsmount *hfsmp, u_int32_t startblk, u_int32_t fileID)
 {
        struct BTreeIterator * iterator = NULL;
        struct FSBufferDescriptor btdata;
        HFSPlusExtentRecord extrec;
        HFSPlusExtentKey *extkeyptr;
        FCB *fcb;
 {
        struct BTreeIterator * iterator = NULL;
        struct FSBufferDescriptor btdata;
        HFSPlusExtentRecord extrec;
        HFSPlusExtentKey *extkeyptr;
        FCB *fcb;
-       u_int32_t block;
-       u_int8_t forktype;
        int overlapped = 0;
        int i;
        int error;
 
        int overlapped = 0;
        int i;
        int error;
 
-       forktype = rsrcfork ? 0xFF : 0;
        if (kmem_alloc(kernel_map, (vm_offset_t *)&iterator, sizeof(*iterator))) {
        if (kmem_alloc(kernel_map, (vm_offset_t *)&iterator, sizeof(*iterator))) {
-               return (0);
+               return 0;
        }       
        bzero(iterator, sizeof(*iterator));
        extkeyptr = (HFSPlusExtentKey *)&iterator->key;
        extkeyptr->keyLength = kHFSPlusExtentKeyMaximumLength;
        }       
        bzero(iterator, sizeof(*iterator));
        extkeyptr = (HFSPlusExtentKey *)&iterator->key;
        extkeyptr->keyLength = kHFSPlusExtentKeyMaximumLength;
-       extkeyptr->forkType = forktype;
+       extkeyptr->forkType = 0;
        extkeyptr->fileID = fileID;
        extkeyptr->fileID = fileID;
-       extkeyptr->startBlock = catblks;
+       extkeyptr->startBlock = 0;
 
        btdata.bufferAddress = &extrec;
        btdata.itemSize = sizeof(extrec);
 
        btdata.bufferAddress = &extrec;
        btdata.itemSize = sizeof(extrec);
@@ -4972,32 +5108,41 @@ hfs_overlapped_overflow_extents(struct hfsmount *hfsmp, u_int32_t startblk, u_in
        
        fcb = VTOF(hfsmp->hfs_extents_vp);
 
        
        fcb = VTOF(hfsmp->hfs_extents_vp);
 
+       /* This will position the iterator just before the first overflow 
+        * extent record for given fileID.  It will always return btNotFound, 
+        * so we special case the error code.
+        */
        error = BTSearchRecord(fcb, iterator, &btdata, NULL, iterator);
        error = BTSearchRecord(fcb, iterator, &btdata, NULL, iterator);
+       if (error && (error != btNotFound)) {
+               goto out;
+       }
+
+       /* BTIterateRecord() might return error if the btree is empty, and 
+        * therefore we return that the extent does not overflow to the caller
+        */
+       error = BTIterateRecord(fcb, kBTreeNextRecord, iterator, &btdata, NULL);
        while (error == 0) {
                /* Stop when we encounter a different file. */
        while (error == 0) {
                /* Stop when we encounter a different file. */
-               if ((extkeyptr->fileID != fileID) ||
-                   (extkeyptr->forkType != forktype)) {
+               if (extkeyptr->fileID != fileID) {
                        break;
                }
                        break;
                }
-               /* 
-                * Check if the file overlaps target space.
-                */
+               /* Check if any of the forks exist in the target space. */
                for (i = 0; i < kHFSPlusExtentDensity; ++i) {
                        if (extrec[i].blockCount == 0) {
                                break;
                        }
                for (i = 0; i < kHFSPlusExtentDensity; ++i) {
                        if (extrec[i].blockCount == 0) {
                                break;
                        }
-                       block = extrec[i].startBlock + extrec[i].blockCount;
-                       if (block >= startblk) {
+                       if ((extrec[i].startBlock + extrec[i].blockCount) >= startblk) {
                                overlapped = 1;
                                overlapped = 1;
-                               break;
+                               goto out;
                        }
                }
                /* Look for more records. */
                error = BTIterateRecord(fcb, kBTreeNextRecord, iterator, &btdata, NULL);
        }
 
                        }
                }
                /* Look for more records. */
                error = BTIterateRecord(fcb, kBTreeNextRecord, iterator, &btdata, NULL);
        }
 
+out:
        kmem_free(kernel_map, (vm_offset_t)iterator, sizeof(*iterator));
        kmem_free(kernel_map, (vm_offset_t)iterator, sizeof(*iterator));
-       return (overlapped);
+       return overlapped;
 }
 
 
 }
 
 
index 2485c73f6a283b7a74e4747954346c3d8b1d274a..307e2db66da929be6ca51322f2d9e242a9726229 100644 (file)
@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -1493,7 +1493,7 @@ hfs_freeblks(struct hfsmount * hfsmp, int wantreserve)
        /*
         * We don't bother taking the mount lock
         * to look at these values since the values
        /*
         * We don't bother taking the mount lock
         * to look at these values since the values
-        * themselves are each updated automically
+        * themselves are each updated atomically
         * on aligned addresses.
         */
        freeblks = hfsmp->freeBlocks;
         * on aligned addresses.
         */
        freeblks = hfsmp->freeBlocks;
index 9114d0a994b28693a59b356b34c42cef0c2f2676..eda49e24284694d5f7e030bbdde147adc97b5b6f 100644 (file)
@@ -812,8 +812,14 @@ hfs_vnop_getattr(struct vnop_getattr_args *ap)
                                
                                if (cp->c_blocks - VTOF(vp)->ff_blocks) {
                                        /* We deal with rsrc fork vnode iocount at the end of the function */
                                
                                if (cp->c_blocks - VTOF(vp)->ff_blocks) {
                                        /* We deal with rsrc fork vnode iocount at the end of the function */
-                                       error = hfs_vgetrsrc(hfsmp, vp, &rvp, TRUE);
+                                       error = hfs_vgetrsrc(hfsmp, vp, &rvp, TRUE, TRUE);
                                        if (error) {
                                        if (error) {
+                                               /* 
+                                                * hfs_vgetrsrc may have returned a vnode in rvp even though
+                                                * we got an error, because we specified error_on_unlinked.
+                                                * We need to drop the iocount after we release the cnode lock, so
+                                                * it will be taken care of at the end of the function if it's needed.
+                                                */
                                                goto out;
                                        }
                                        
                                                goto out;
                                        }
                                        
@@ -2263,11 +2269,15 @@ hfs_vnop_remove(ap)
                if ((error = hfs_lock (cp, HFS_EXCLUSIVE_LOCK))) {
                        return (error);
                }
                if ((error = hfs_lock (cp, HFS_EXCLUSIVE_LOCK))) {
                        return (error);
                }
-
-               error = hfs_vgetrsrc(hfsmp, vp, &rvp, TRUE);
+               error = hfs_vgetrsrc(hfsmp, vp, &rvp, TRUE, TRUE);
                hfs_unlock(cp);
                if (error) {
                hfs_unlock(cp);
                if (error) {
-                       return (error); 
+                       /* We may have gotten a rsrc vp out even though we got an error back. */
+                       if (rvp) {
+                               vnode_put(rvp);
+                               rvp = NULL;
+                       }
+                       return error;
                }
                drop_rsrc_vnode = 1;
        }
                }
                drop_rsrc_vnode = 1;
        }
@@ -2670,10 +2680,17 @@ hfs_removefile(struct vnode *dvp, struct vnode *vp, struct componentname *cnp,
                if (error && error != ENXIO && error != ENOENT && truncated) {
                        if ((cp->c_datafork && cp->c_datafork->ff_size != 0) ||
                                        (cp->c_rsrcfork && cp->c_rsrcfork->ff_size != 0)) {
                if (error && error != ENXIO && error != ENOENT && truncated) {
                        if ((cp->c_datafork && cp->c_datafork->ff_size != 0) ||
                                        (cp->c_rsrcfork && cp->c_rsrcfork->ff_size != 0)) {
+                               off_t data_size = 0;
+                               off_t rsrc_size = 0;
+                               if (cp->c_datafork) {
+                                       data_size = cp->c_datafork->ff_size;
+                               }
+                               if (cp->c_rsrcfork) {
+                                       rsrc_size = cp->c_rsrcfork->ff_size;
+                               }
                                printf("hfs: remove: couldn't delete a truncated file (%s)" 
                                                "(error %d, data sz %lld; rsrc sz %lld)",
                                printf("hfs: remove: couldn't delete a truncated file (%s)" 
                                                "(error %d, data sz %lld; rsrc sz %lld)",
-                                       cp->c_desc.cd_nameptr, error, cp->c_datafork->ff_size, 
-                                       cp->c_rsrcfork->ff_size);
+                                       cp->c_desc.cd_nameptr, error, data_size, rsrc_size);
                                hfs_mark_volume_inconsistent(hfsmp);
                        } else {
                                printf("hfs: remove: strangely enough, deleting truncated file %s (%d) got err %d\n",
                                hfs_mark_volume_inconsistent(hfsmp);
                        } else {
                                printf("hfs: remove: strangely enough, deleting truncated file %s (%d) got err %d\n",
@@ -2850,10 +2867,17 @@ hfs_vnop_rename(ap)
                if ((error = hfs_lock (VTOC(fvp), HFS_EXCLUSIVE_LOCK))) {
                        return (error);
                }
                if ((error = hfs_lock (VTOC(fvp), HFS_EXCLUSIVE_LOCK))) {
                        return (error);
                }
-
-               error = hfs_vgetrsrc(VTOHFS(fvp), fvp, &fvp_rsrc, TRUE);
+               
+               /*
+                * We care if we race against rename/delete with this cnode, so we'll
+                * error out if this file becomes open-unlinked during this call.
+                */
+               error = hfs_vgetrsrc(VTOHFS(fvp), fvp, &fvp_rsrc, TRUE, TRUE);
                hfs_unlock (VTOC(fvp));
                if (error) {
                hfs_unlock (VTOC(fvp));
                if (error) {
+                       if (fvp_rsrc) {
+                               vnode_put (fvp_rsrc);
+                       }
                        return error;
                }
        }
                        return error;
                }
        }
@@ -2865,13 +2889,30 @@ hfs_vnop_rename(ap)
                 * grab the resource fork if the lock succeeded.
                 */
                if (hfs_lock (VTOC(tvp), HFS_EXCLUSIVE_LOCK) == 0) {
                 * grab the resource fork if the lock succeeded.
                 */
                if (hfs_lock (VTOC(tvp), HFS_EXCLUSIVE_LOCK) == 0) {
-                       error = hfs_vgetrsrc(VTOHFS(tvp), tvp, &tvp_rsrc, TRUE);
-                       hfs_unlock (VTOC(tvp));
+                       tcp = VTOC(tvp);
+                       
+                       /* 
+                        * We only care if we get an open-unlinked file on the dst so we 
+                        * know to null out tvp/tcp to make the rename operation act 
+                        * as if they never existed.  Because they're effectively out of the
+                        * namespace already it's fine to do this.  If this is true, then
+                        * make sure to unlock the cnode and drop the iocount only after the unlock.
+                        */
+                       error = hfs_vgetrsrc(VTOHFS(tvp), tvp, &tvp_rsrc, TRUE, TRUE);
+                       hfs_unlock (tcp);
                        if (error) {
                        if (error) {
-                               if (fvp_rsrc) {
-                                       vnode_put (fvp_rsrc);
+                               /*
+                                * Since we specify TRUE for error-on-unlinked in hfs_vgetrsrc,
+                                * we can get a rsrc fork vp even if it returns an error.
+                                */
+                               tcp = NULL;
+                               tvp = NULL;
+                               if (tvp_rsrc) {
+                                       vnode_put (tvp_rsrc);
+                                       tvp_rsrc = NULLVP;
                                }
                                }
-                               return error;
+                               /* just bypass truncate lock and act as if we never got tcp/tvp */
+                               goto retry;
                        }
                }
        }
                        }
                }
        }
@@ -4282,22 +4323,48 @@ exit:
 }
 
 
 }
 
 
-/*
- * Return a referenced vnode for the resource fork
- *
- * cnode for vnode vp must already be locked.
- *
- * can_drop_lock is true if its safe to temporarily drop/re-acquire the cnode lock
+
+/* hfs_vgetrsrc acquires a resource fork vnode corresponding to the cnode that is
+ * found in 'vp'.  The rsrc fork vnode is returned with the cnode locked and iocount
+ * on the rsrc vnode.
+ * 
+ * *rvpp is an output argument for returning the pointer to the resource fork vnode.
+ * In most cases, the resource fork vnode will not be set if we return an error. 
+ * However, if error_on_unlinked is set, we may have already acquired the resource fork vnode
+ * before we discover the error (the file has gone open-unlinked).  In this case only,
+ * we may return a vnode in the output argument despite an error.
+ * 
+ * If can_drop_lock is set, then it is safe for this function to temporarily drop
+ * and then re-acquire the cnode lock.  We may need to do this, for example, in order to 
+ * acquire an iocount or promote our lock.  
+ * 
+ * error_on_unlinked is an argument which indicates that we are to return an error if we 
+ * discover that the cnode has gone into an open-unlinked state ( C_DELETED or C_NOEXISTS)
+ * is set in the cnode flags.  This is only necessary if can_drop_lock is true, otherwise 
+ * there's really no reason to double-check for errors on the cnode.
  */
  */
+
 __private_extern__
 int
 __private_extern__
 int
-hfs_vgetrsrc(struct hfsmount *hfsmp, struct vnode *vp, struct vnode **rvpp, int can_drop_lock)
+hfs_vgetrsrc(struct hfsmount *hfsmp, struct vnode *vp, 
+               struct vnode **rvpp, int can_drop_lock, int error_on_unlinked)
 {
        struct vnode *rvp;
        struct vnode *dvp = NULLVP;
        struct cnode *cp = VTOC(vp);
        int error;
        int vid;
 {
        struct vnode *rvp;
        struct vnode *dvp = NULLVP;
        struct cnode *cp = VTOC(vp);
        int error;
        int vid;
+       int delete_status = 0;
+
+
+       /*
+        * Need to check the status of the cnode to validate it hasn't
+        * gone open-unlinked on us before we can actually do work with it.
+        */
+       delete_status = hfs_checkdeleted (cp);
+       if ((delete_status) && (error_on_unlinked)) {
+               return delete_status;
+       }
 
 restart:
        /* Attempt to use exising vnode */
 
 restart:
        /* Attempt to use exising vnode */
@@ -4324,6 +4391,32 @@ restart:
 
                if (can_drop_lock) {
                        (void) hfs_lock(cp, HFS_FORCE_LOCK);
 
                if (can_drop_lock) {
                        (void) hfs_lock(cp, HFS_FORCE_LOCK);
+
+                       /*
+                        * When we relinquished our cnode lock, the cnode could have raced
+                        * with a delete and gotten deleted.  If the caller did not want
+                        * us to ignore open-unlinked files, then re-check the C_DELETED
+                        * state and see if we need to return an ENOENT here because the item
+                        * got deleted in the intervening time.
+                        */
+                       if (error_on_unlinked) {
+                               if ((delete_status = hfs_checkdeleted(cp))) {
+                                       /* 
+                                        * If error == 0, this means that we succeeded in acquiring an iocount on the 
+                                        * rsrc fork vnode.  However, if we're in this block of code, that 
+                                        * means that we noticed that the cnode has gone open-unlinked.  In 
+                                        * this case, the caller requested that we not do any other work and 
+                                        * return an errno.  The caller will be responsible for dropping the 
+                                        * iocount we just acquired because we can't do it until we've released 
+                                        * the cnode lock.  
+                                        */
+                                       if (error == 0) {
+                                               *rvpp = rvp;
+                                       }
+                                       return delete_status;
+                               }
+                       }
+
                        /*
                         * When our lock was relinquished, the resource fork
                         * could have been recycled.  Check for this and try
                        /*
                         * When our lock was relinquished, the resource fork
                         * could have been recycled.  Check for this and try
@@ -4359,7 +4452,7 @@ restart:
                                return (EINVAL);
                        }
                        /*
                                return (EINVAL);
                        }
                        /*
-                        * If the upgrade fails we loose the lock and
+                        * If the upgrade fails we lose the lock and
                         * have to take the exclusive lock on our own.
                         */
                        if (lck_rw_lock_shared_to_exclusive(&cp->c_rwlock) == FALSE)
                         * have to take the exclusive lock on our own.
                         */
                        if (lck_rw_lock_shared_to_exclusive(&cp->c_rwlock) == FALSE)
@@ -4372,9 +4465,17 @@ restart:
                 * C_DELETED.  This is because we need to continue to provide rsrc
                 * fork access to open-unlinked files.  In this case, build a fake descriptor
                 * like in hfs_removefile.  If we don't do this, buildkey will fail in
                 * C_DELETED.  This is because we need to continue to provide rsrc
                 * fork access to open-unlinked files.  In this case, build a fake descriptor
                 * like in hfs_removefile.  If we don't do this, buildkey will fail in
-                * cat_lookup because this cnode has no name in its descriptor.
+                * cat_lookup because this cnode has no name in its descriptor. However,
+                * only do this if the caller did not specify that they wanted us to
+                * error out upon encountering open-unlinked files.
                 */
 
                 */
 
+               if ((error_on_unlinked) && (can_drop_lock)) {
+                       if ((error = hfs_checkdeleted (cp))) {
+                               return error;
+                       }
+               }
+
                if ((cp->c_flag & C_DELETED ) && (cp->c_desc.cd_namelen == 0)) {
                        bzero (&to_desc, sizeof(to_desc));
                        bzero (delname, 32);
                if ((cp->c_flag & C_DELETED ) && (cp->c_desc.cd_namelen == 0)) {
                        bzero (&to_desc, sizeof(to_desc));
                        bzero (delname, 32);
index 915fbe874ad56ab6d29623112c33caf8a079a1df..f552b9c7558ff0e6378e42c755fc3aa7a1cea902 100644 (file)
@@ -141,7 +141,7 @@ hfs_vnop_getnamedstream(struct vnop_getnamedstream_args* ap)
                hfs_unlock(cp);
                return (ENOATTR);
        }
                hfs_unlock(cp);
                return (ENOATTR);
        }
-       error = hfs_vgetrsrc(VTOHFS(vp), vp, svpp, TRUE);
+       error = hfs_vgetrsrc(VTOHFS(vp), vp, svpp, TRUE, FALSE);
        hfs_unlock(cp);
 
        return (error);
        hfs_unlock(cp);
 
        return (error);
@@ -184,7 +184,7 @@ hfs_vnop_makenamedstream(struct vnop_makenamedstream_args* ap)
        if ((error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) {
                return (error);
        }
        if ((error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) {
                return (error);
        }
-       error = hfs_vgetrsrc(VTOHFS(vp), vp, svpp, TRUE);
+       error = hfs_vgetrsrc(VTOHFS(vp), vp, svpp, TRUE, FALSE);
        hfs_unlock(cp);
 
        return (error);
        hfs_unlock(cp);
 
        return (error);
@@ -328,7 +328,7 @@ hfs_vnop_getxattr(struct vnop_getxattr_args *ap)
                                openunlinked = 1;
                        }
                        
                                openunlinked = 1;
                        }
                        
-                       result = hfs_vgetrsrc(hfsmp, vp, &rvp, TRUE);
+                       result = hfs_vgetrsrc(hfsmp, vp, &rvp, TRUE, FALSE);
                        hfs_unlock(cp);
                        if (result) {
                                return (result);
                        hfs_unlock(cp);
                        if (result) {
                                return (result);
@@ -719,7 +719,7 @@ hfs_vnop_setxattr(struct vnop_setxattr_args *ap)
                        openunlinked = 1;
                }
 
                        openunlinked = 1;
                }
 
-               result = hfs_vgetrsrc(hfsmp, vp, &rvp, TRUE);
+               result = hfs_vgetrsrc(hfsmp, vp, &rvp, TRUE, FALSE);
                hfs_unlock(cp);
                if (result) {
                        return (result);
                hfs_unlock(cp);
                if (result) {
                        return (result);
@@ -1096,7 +1096,7 @@ hfs_vnop_removexattr(struct vnop_removexattr_args *ap)
                        hfs_unlock(cp);
                        return (ENOATTR);
                }
                        hfs_unlock(cp);
                        return (ENOATTR);
                }
-               result = hfs_vgetrsrc(hfsmp, vp, &rvp, TRUE);
+               result = hfs_vgetrsrc(hfsmp, vp, &rvp, TRUE, FALSE);
                hfs_unlock(cp);
                if (result) {
                        return (result);
                hfs_unlock(cp);
                if (result) {
                        return (result);
@@ -2302,9 +2302,9 @@ free_attr_blks(struct hfsmount *hfsmp, int blkcnt, HFSPlusExtentDescriptor *exte
                        break;
                }
                (void)BlockDeallocate(hfsmp, extents[i].startBlock, extents[i].blockCount);
                        break;
                }
                (void)BlockDeallocate(hfsmp, extents[i].startBlock, extents[i].blockCount);
+               remblks -= extents[i].blockCount;
                extents[i].startBlock = 0;
                extents[i].blockCount = 0;
                extents[i].startBlock = 0;
                extents[i].blockCount = 0;
-               remblks -= extents[i].blockCount;
 
 #if HFS_XATTR_VERBOSE
                printf("hfs: free_attr_blks: BlockDeallocate [%d, %d]\n",
 
 #if HFS_XATTR_VERBOSE
                printf("hfs: free_attr_blks: BlockDeallocate [%d, %d]\n",
index 64c7b86f0cde969922909410077f313f9b1470d7..99d586408da2425df6fc365de6c9f5f0beecd042 100644 (file)
@@ -696,6 +696,18 @@ BTZeroUnusedNodes(FCB *filePtr)
                                                goto ErrorExit;
                                        }
                                        
                                                goto ErrorExit;
                                        }
                                        
+                                       if (buf_flags(bp) & B_LOCKED) {
+                                               /* 
+                                                * This node is already part of a transaction and will be
+                                                * written when the transaction is committed so don't write it here.
+                                                * If we did, then we'd hit a panic in hfs_vnop_bwrite since
+                                                * B_LOCKED is still set
+                                                */
+                                               buf_brelse(bp);
+                                               continue;
+                                       }
+
+                                       
                                        buf_clear(bp);
                                        buf_markaged(bp);
                                        
                                        buf_clear(bp);
                                        buf_markaged(bp);
                                        
index 6325962b2cfe1fc2b08c0ec248822361a30fffec..0ed79dc69acc3cfcf564ab0ab0b7fda0099a0fe6 100644 (file)
@@ -123,8 +123,8 @@ imageboot_setup()
        error = vfs_mountroot();
 
        if (error == 0 && rootvnode != NULL) {
        error = vfs_mountroot();
 
        if (error == 0 && rootvnode != NULL) {
-               struct vnode *tvp;
-               struct vnode *newdp;
+               vnode_t newdp, old_rootvnode;
+               mount_t new_rootfs, old_rootfs;
 
                /*
                 * Get the vnode for '/'.
 
                /*
                 * Get the vnode for '/'.
@@ -133,17 +133,45 @@ imageboot_setup()
                if (VFS_ROOT(TAILQ_LAST(&mountlist,mntlist), &newdp, vfs_context_kernel()))
                        panic("%s: cannot find root vnode", __FUNCTION__);
 
                if (VFS_ROOT(TAILQ_LAST(&mountlist,mntlist), &newdp, vfs_context_kernel()))
                        panic("%s: cannot find root vnode", __FUNCTION__);
 
+               old_rootvnode = rootvnode;
+               old_rootfs = rootvnode->v_mount;
+
+               mount_list_remove(old_rootfs);
+
+               mount_lock(old_rootfs);
+#ifdef CONFIG_IMGSRC_ACCESS
+               old_rootfs->mnt_kern_flag |= MNTK_BACKS_ROOT;
+#endif /* CONFIG_IMGSRC_ACCESS */
+               old_rootfs->mnt_flag &= ~MNT_ROOTFS;
+               mount_unlock(old_rootfs);
+
+               rootvnode = newdp;
+
+               new_rootfs = rootvnode->v_mount;
+               mount_lock(new_rootfs);
+               new_rootfs->mnt_flag |= MNT_ROOTFS;
+               mount_unlock(new_rootfs);
+
                vnode_ref(newdp);
                vnode_put(newdp);
                vnode_ref(newdp);
                vnode_put(newdp);
-               tvp = rootvnode;
-               vnode_rele(tvp);
                filedesc0.fd_cdir = newdp;
                filedesc0.fd_cdir = newdp;
-               rootvnode = newdp;
-               mount_list_lock();
-               TAILQ_REMOVE(&mountlist, TAILQ_FIRST(&mountlist), mnt_list);
-               mount_list_unlock();
-               mountlist.tqh_first->mnt_flag |= MNT_ROOTFS;
                DBG_TRACE("%s: root switched\n", __FUNCTION__);
                DBG_TRACE("%s: root switched\n", __FUNCTION__);
+
+#ifdef CONFIG_IMGSRC_ACCESS
+               if (PE_imgsrc_mount_supported()) {
+                       imgsrc_rootvnode = old_rootvnode;
+               } else {
+                       vnode_getalways(old_rootvnode);
+                       vnode_rele(old_rootvnode);
+                       vnode_put(old_rootvnode);
+               }
+#else 
+               vnode_getalways(old_rootvnode);
+               vnode_rele(old_rootvnode);
+               vnode_put(old_rootvnode);
+#endif /* CONFIG_IMGSRC_ACCESS */
+
+
        }
 done:
        FREE_ZONE(root_path, MAXPATHLEN, M_NAMEI);
        }
 done:
        FREE_ZONE(root_path, MAXPATHLEN, M_NAMEI);
index ee97c249c125c54487ad3a96d5755fb5b26d692c..bc3089a8fe6108d0acdbf4b77fc702fc81b0f9c4 100644 (file)
@@ -168,10 +168,10 @@ static lck_mtx_t        stackshot_subsys_mutex;
 void *stackshot_snapbuf = NULL;
 
 int
 void *stackshot_snapbuf = NULL;
 
 int
-stack_snapshot2(pid_t pid, user_addr_t tracebuf, uint32_t tracebuf_size, uint32_t options, int32_t *retval);
+stack_snapshot2(pid_t pid, user_addr_t tracebuf, uint32_t tracebuf_size, uint32_t flags, uint32_t dispatch_offset, int32_t *retval);
 
 extern void
 
 extern void
-kdp_snapshot_preflight(int pid, void  *tracebuf, uint32_t tracebuf_size, uint32_t options);
+kdp_snapshot_preflight(int pid, void  *tracebuf, uint32_t tracebuf_size, uint32_t flags, uint32_t dispatch_offset);
 
 extern int
 kdp_stack_snapshot_geterror(void);
 
 extern int
 kdp_stack_snapshot_geterror(void);
@@ -1705,11 +1705,11 @@ stack_snapshot(struct proc *p, register struct stack_snapshot_args *uap, int32_t
                 return(error);
 
        return stack_snapshot2(uap->pid, uap->tracebuf, uap->tracebuf_size,
                 return(error);
 
        return stack_snapshot2(uap->pid, uap->tracebuf, uap->tracebuf_size,
-           uap->options, retval);
+           uap->flags, uap->dispatch_offset, retval);
 }
 
 int
 }
 
 int
-stack_snapshot2(pid_t pid, user_addr_t tracebuf, uint32_t tracebuf_size, uint32_t options, int32_t *retval)
+stack_snapshot2(pid_t pid, user_addr_t tracebuf, uint32_t tracebuf_size, uint32_t flags, uint32_t dispatch_offset, int32_t *retval)
 {
        int error = 0;
        unsigned bytesTraced = 0;
 {
        int error = 0;
        unsigned bytesTraced = 0;
@@ -1730,7 +1730,7 @@ stack_snapshot2(pid_t pid, user_addr_t tracebuf, uint32_t tracebuf_size, uint32_
                goto error_exit;
        }
 /* Preload trace parameters*/  
                goto error_exit;
        }
 /* Preload trace parameters*/  
-       kdp_snapshot_preflight(pid, stackshot_snapbuf, tracebuf_size, options);
+       kdp_snapshot_preflight(pid, stackshot_snapbuf, tracebuf_size, flags, dispatch_offset);
 
 /* Trap to the debugger to obtain a coherent stack snapshot; this populates
  * the trace buffer
 
 /* Trap to the debugger to obtain a coherent stack snapshot; this populates
  * the trace buffer
index 1a0f609ca828a718151f96e7156ecaa605a85eb0..5d195dcf0d628e111cd4dbaec3529a842b84fc8e 100644 (file)
@@ -1564,6 +1564,17 @@ kevent_register(struct kqueue *kq, struct kevent64_s *kev, __unused struct proc
                                knote_enqueue(kn);
                }
 
                                knote_enqueue(kn);
                }
 
+               /*
+                * The user may change some filter values after the
+                * initial EV_ADD, but doing so will not reset any 
+                * filter which have already been triggered.
+                */
+               kn->kn_kevent.udata = kev->udata;
+               if (fops->f_isfd || fops->f_touch == NULL) {
+                       kn->kn_sfflags = kev->fflags;
+                       kn->kn_sdata = kev->data;
+               }
+
                /*
                 * If somebody is in the middle of dropping this
                 * knote - go find/insert a new one.  But we have
                /*
                 * If somebody is in the middle of dropping this
                 * knote - go find/insert a new one.  But we have
@@ -1578,17 +1589,11 @@ kevent_register(struct kqueue *kq, struct kevent64_s *kev, __unused struct proc
                }
 
                /*
                }
 
                /*
-                * The user may change some filter values after the
-                * initial EV_ADD, but doing so will not reset any 
-                * filter which have already been triggered.
+                * Call touch routine to notify filter of changes
+                * in filter values.
                 */
                 */
-               kn->kn_kevent.udata = kev->udata;
                if (!fops->f_isfd && fops->f_touch != NULL)
                        fops->f_touch(kn, kev, EVENT_REGISTER);
                if (!fops->f_isfd && fops->f_touch != NULL)
                        fops->f_touch(kn, kev, EVENT_REGISTER);
-               else {
-                       kn->kn_sfflags = kev->fflags;
-                       kn->kn_sdata = kev->data;
-               }
 
                /* We may need to push some info down to a networked filesystem */
                if (kn->kn_filter == EVFILT_VNODE) {
 
                /* We may need to push some info down to a networked filesystem */
                if (kn->kn_filter == EVFILT_VNODE) {
@@ -1680,13 +1685,10 @@ knote_process(struct knote      *kn,
                                }
 
                                /* capture the kevent data - using touch if specified */
                                }
 
                                /* capture the kevent data - using touch if specified */
-                               if (result) {
-                                       if (touch) {
-                                               kn->kn_fop->f_touch(kn, &kev, EVENT_PROCESS);
-                                       } else {
-                                               kev = kn->kn_kevent;
-                                       }
+                               if (result && touch) {
+                                       kn->kn_fop->f_touch(kn, &kev, EVENT_PROCESS);
                                }
                                }
+
                                /* convert back to a kqlock - bail if the knote went away */
                                if (!knoteuse2kqlock(kq, kn)) {
                                        return EJUSTRETURN;
                                /* convert back to a kqlock - bail if the knote went away */
                                if (!knoteuse2kqlock(kq, kn)) {
                                        return EJUSTRETURN;
@@ -1695,6 +1697,12 @@ knote_process(struct knote       *kn,
                                        if (!(kn->kn_status & KN_ACTIVE)) {
                                                knote_activate(kn, 0);
                                        }
                                        if (!(kn->kn_status & KN_ACTIVE)) {
                                                knote_activate(kn, 0);
                                        }
+
+                                       /* capture all events that occurred during filter */
+                                       if (!touch) {
+                                               kev = kn->kn_kevent;
+                                       }
+
                                } else if ((kn->kn_status & KN_STAYQUEUED) == 0) {
                                        /* was already dequeued, so just bail on this one */
                                        return EJUSTRETURN;
                                } else if ((kn->kn_status & KN_STAYQUEUED) == 0) {
                                        /* was already dequeued, so just bail on this one */
                                        return EJUSTRETURN;
@@ -1724,21 +1732,26 @@ knote_process(struct knote      *kn,
 
        if (result == 0) {
                return EJUSTRETURN;
 
        if (result == 0) {
                return EJUSTRETURN;
-       } else if (kn->kn_flags & EV_ONESHOT) {
+       } else if ((kn->kn_flags & EV_ONESHOT) != 0) {
                knote_deactivate(kn);
                if (kqlock2knotedrop(kq, kn)) {
                        kn->kn_fop->f_detach(kn);
                        knote_drop(kn, p);
                }
                knote_deactivate(kn);
                if (kqlock2knotedrop(kq, kn)) {
                        kn->kn_fop->f_detach(kn);
                        knote_drop(kn, p);
                }
-       } else if (kn->kn_flags & (EV_CLEAR | EV_DISPATCH)) {
-               knote_deactivate(kn);
-               /* manually clear knotes who weren't 'touch'ed */
-               if ((touch == 0) && (kn->kn_flags & EV_CLEAR)) {
+       } else if ((kn->kn_flags & (EV_CLEAR | EV_DISPATCH)) != 0) {
+               if ((kn->kn_flags & EV_DISPATCH) != 0) {
+                       /* deactivate and disable all dispatch knotes */
+                       knote_deactivate(kn);
+                       kn->kn_status |= KN_DISABLED;
+               } else if (!touch || kn->kn_fflags == 0) {
+                       /* only deactivate if nothing since the touch */
+                       knote_deactivate(kn);
+               }
+               if (!touch && (kn->kn_flags & EV_CLEAR) != 0) {
+                       /* manually clear non-touch knotes */
                        kn->kn_data = 0;
                        kn->kn_fflags = 0;
                }
                        kn->kn_data = 0;
                        kn->kn_fflags = 0;
                }
-               if (kn->kn_flags & EV_DISPATCH)
-                       kn->kn_status |= KN_DISABLED;
                kqunlock(kq);
        } else {
                /*
                kqunlock(kq);
        } else {
                /*
index f5e141455921d02845e6ec2b974a49d3dc47977f..35e9a43a685b85dd66234d4bade9f89cb9a31172 100644 (file)
@@ -315,7 +315,7 @@ sysctl_handle_kern_memorystatus_priority_list(__unused struct sysctl_oid *oid, _
 #if DEBUG 
                printf("set jetsam priority pids = { ");
                for (i = 0; i < jetsam_priority_list_count; i++) {
 #if DEBUG 
                printf("set jetsam priority pids = { ");
                for (i = 0; i < jetsam_priority_list_count; i++) {
-                       printf("%d ", temp_list[i].pid);
+                       printf("(%d, 0x%08x, %d) ", temp_list[i].pid, temp_list[i].flags, temp_list[i].hiwat_pages);
                }
                printf("}\n");
 #endif /* DEBUG */
                }
                printf("}\n");
 #endif /* DEBUG */
@@ -326,6 +326,10 @@ sysctl_handle_kern_memorystatus_priority_list(__unused struct sysctl_oid *oid, _
                for (i = jetsam_priority_list_count; i < kMaxPriorityEntries; i++) {
                        jetsam_priority_list[i].pid = 0;
                        jetsam_priority_list[i].flags = 0;
                for (i = jetsam_priority_list_count; i < kMaxPriorityEntries; i++) {
                        jetsam_priority_list[i].pid = 0;
                        jetsam_priority_list[i].flags = 0;
+                       jetsam_priority_list[i].hiwat_pages = -1;
+                       jetsam_priority_list[i].hiwat_reserved1 = -1;
+                       jetsam_priority_list[i].hiwat_reserved2 = -1;
+                       jetsam_priority_list[i].hiwat_reserved3 = -1;
                }
                jetsam_priority_list_index = 0;
                lck_mtx_unlock(jetsam_list_mlock);
                }
                jetsam_priority_list_index = 0;
                lck_mtx_unlock(jetsam_list_mlock);
index 02166d578bc6aa04041b641746da756ed6be35bc..6da43d2fdb95e5cc796bd51a0b271e30971f87d4 100644 (file)
@@ -690,7 +690,7 @@ mprotect(__unused proc_t p, struct mprotect_args *uap, __unused int32_t *retval)
 
        user_addr = (mach_vm_offset_t) uap->addr;
        user_size = (mach_vm_size_t) uap->len;
 
        user_addr = (mach_vm_offset_t) uap->addr;
        user_size = (mach_vm_size_t) uap->len;
-       prot = (vm_prot_t)(uap->prot & VM_PROT_ALL);
+       prot = (vm_prot_t)(uap->prot & (VM_PROT_ALL | VM_PROT_TRUSTED));
 
        if (user_addr & PAGE_MASK_64) {
                /* UNIX SPEC: user address is not page-aligned, return EINVAL */
 
        if (user_addr & PAGE_MASK_64) {
                /* UNIX SPEC: user address is not page-aligned, return EINVAL */
@@ -728,6 +728,34 @@ mprotect(__unused proc_t p, struct mprotect_args *uap, __unused int32_t *retval)
        if (error)
                return (error);
 #endif
        if (error)
                return (error);
 #endif
+
+       if(prot & VM_PROT_TRUSTED) {
+#if CONFIG_DYNAMIC_CODE_SIGNING
+               /* CODE SIGNING ENFORCEMENT - JIT support */
+               /* The special protection value VM_PROT_TRUSTED requests that we treat
+                * this page as if it had a valid code signature.
+                * If this is enabled, there MUST be a MAC policy implementing the 
+                * mac_proc_check_mprotect() hook above. Otherwise, Codesigning will be
+                * compromised because the check would always succeed and thusly any
+                * process could sign dynamically. */
+               result = vm_map_sign(user_map, 
+                                    vm_map_trunc_page(user_addr), 
+                                    vm_map_round_page(user_addr+user_size));
+               switch (result) {
+                       case KERN_SUCCESS:
+                               break;
+                       case KERN_INVALID_ADDRESS:
+                               /* UNIX SPEC: for an invalid address range, return ENOMEM */
+                               return ENOMEM;
+                       default:
+                               return EINVAL;
+               }
+#else
+               return ENOTSUP;
+#endif
+       }
+       prot &= ~VM_PROT_TRUSTED;
+       
        result = mach_vm_protect(user_map, user_addr, user_size,
                                 FALSE, prot);
        switch (result) {
        result = mach_vm_protect(user_map, user_addr, user_size,
                                 FALSE, prot);
        switch (result) {
index b51c4ecbe35ae692fcbd3d52c6db058e8382134d..02b61872ab463d2ebf642bd66fac6081f56f90c7 100644 (file)
 
 int    donice(struct proc *curp, struct proc *chgp, int n);
 int    dosetrlimit(struct proc *p, u_int which, struct rlimit *limp);
 
 int    donice(struct proc *curp, struct proc *chgp, int n);
 int    dosetrlimit(struct proc *p, u_int which, struct rlimit *limp);
+static void do_background_socket(struct proc *curp, thread_t thread, int priority);
 static int do_background_thread(struct proc *curp, int priority);
 static int do_background_thread(struct proc *curp, int priority);
+static int do_background_task(struct proc *curp, int priority);
 
 rlim_t maxdmap = MAXDSIZ;      /* XXX */ 
 rlim_t maxsmap = MAXSSIZ - PAGE_SIZE;  /* XXX */ 
 
 rlim_t maxdmap = MAXDSIZ;      /* XXX */ 
 rlim_t maxsmap = MAXSSIZ - PAGE_SIZE;  /* XXX */ 
@@ -369,10 +371,35 @@ setpriority(struct proc *curp, struct setpriority_args *uap, __unused int32_t *r
                        return (EINVAL);
                }
                error = do_background_thread(curp, uap->prio);
                        return (EINVAL);
                }
                error = do_background_thread(curp, uap->prio);
+               (void) do_background_socket(curp, current_thread(), uap->prio);
                found++;
                break;
        }
 
                found++;
                break;
        }
 
+       case PRIO_DARWIN_PROCESS: {
+               if (uap->who == 0)
+                       p = curp;
+               else {
+                       p = proc_find(uap->who);
+                       if (p == 0)
+                               break;
+                       refheld = 1;
+               }
+
+               error = do_background_task(p, uap->prio);
+               (void) do_background_socket(p, NULL, uap->prio);
+               
+               proc_lock(p);
+               p->p_iopol_disk = (uap->prio == PRIO_DARWIN_BG ? 
+                               IOPOL_THROTTLE : IOPOL_DEFAULT); 
+               proc_unlock(p);
+
+               found++;
+               if (refheld != 0)
+                       proc_rele(p);
+               break;
+       }
+
        default:
                return (EINVAL);
        }
        default:
                return (EINVAL);
        }
@@ -427,20 +454,93 @@ out:
        return (error);
 }
 
        return (error);
 }
 
+static int
+do_background_task(struct proc *p, int priority)
+{
+       int error = 0;
+       task_category_policy_data_t info;
+
+       if (priority & PRIO_DARWIN_BG) { 
+               info.role = TASK_THROTTLE_APPLICATION;
+       } else {
+               info.role = TASK_DEFAULT_APPLICATION;
+       }
+
+       error = task_policy_set(p->task,
+                       TASK_CATEGORY_POLICY,
+                       (task_policy_t) &info,
+                       TASK_CATEGORY_POLICY_COUNT);
+       return (error);
+}
+
+static void 
+do_background_socket(struct proc *curp, thread_t thread, int priority)
+{
+       struct filedesc                     *fdp;
+       struct fileproc                     *fp;
+       int                                 i;
+
+       if (priority & PRIO_DARWIN_BG) {
+               /* enable network throttle process-wide (if no thread is specified) */
+               if (thread == NULL) {
+                       proc_fdlock(curp);
+                       fdp = curp->p_fd;
+
+                       for (i = 0; i < fdp->fd_nfiles; i++) {
+                               struct socket       *sockp;
+
+                               fp = fdp->fd_ofiles[i];
+                               if (fp == NULL || (fdp->fd_ofileflags[i] & UF_RESERVED) != 0 ||
+                                               fp->f_fglob->fg_type != DTYPE_SOCKET) {
+                                       continue;
+                               }
+                               sockp = (struct socket *)fp->f_fglob->fg_data;
+                               sockp->so_traffic_mgt_flags |= TRAFFIC_MGT_SO_BACKGROUND;
+                               sockp->so_background_thread = NULL;
+                       }
+                       proc_fdunlock(curp);
+               }
+
+       } else {
+               /* disable networking IO throttle.
+                * NOTE - It is a known limitation of the current design that we 
+                * could potentially clear TRAFFIC_MGT_SO_BACKGROUND bit for 
+                * sockets created by other threads within this process.  
+                */
+               proc_fdlock(curp);
+               fdp = curp->p_fd;
+               for ( i = 0; i < fdp->fd_nfiles; i++ ) {
+                       struct socket       *sockp;
+
+                       fp = fdp->fd_ofiles[ i ];
+                       if ( fp == NULL || (fdp->fd_ofileflags[ i ] & UF_RESERVED) != 0 ||
+                                       fp->f_fglob->fg_type != DTYPE_SOCKET ) {
+                               continue;
+                       }
+                       sockp = (struct socket *)fp->f_fglob->fg_data;
+                       /* skip if only clearing this thread's sockets */
+                       if ((thread) && (sockp->so_background_thread != thread)) {
+                               continue;
+                       }
+                       sockp->so_traffic_mgt_flags &= ~TRAFFIC_MGT_SO_BACKGROUND;
+                       sockp->so_background_thread = NULL;
+               }
+               proc_fdunlock(curp);
+       }
+}
+
+
 /*
  * do_background_thread
  * Returns:    0                       Success
  * XXX - todo - does this need a MACF hook?
  */
 static int
 /*
  * do_background_thread
  * Returns:    0                       Success
  * XXX - todo - does this need a MACF hook?
  */
 static int
-do_background_thread(struct proc *curp, int priority)
+do_background_thread(struct proc *curp __unused, int priority)
 {
 {
-       int                                                                     i;
        thread_t                                                        thread;
        struct uthread                                          *ut;
        thread_precedence_policy_data_t         policy;
        thread_t                                                        thread;
        struct uthread                                          *ut;
        thread_precedence_policy_data_t         policy;
-       struct filedesc                                         *fdp;
-       struct fileproc                                         *fp;
        
        thread = current_thread();
        ut = get_bsdthread_info(thread);
        
        thread = current_thread();
        ut = get_bsdthread_info(thread);
@@ -461,31 +561,6 @@ do_background_thread(struct proc *curp, int priority)
                thread_policy_set( thread, THREAD_PRECEDENCE_POLICY,
                                                   (thread_policy_t)&policy,
                                                   THREAD_PRECEDENCE_POLICY_COUNT );
                thread_policy_set( thread, THREAD_PRECEDENCE_POLICY,
                                                   (thread_policy_t)&policy,
                                                   THREAD_PRECEDENCE_POLICY_COUNT );
-
-               /* disable networking IO throttle.
-                * NOTE - It is a known limitation of the current design that we 
-                * could potentially clear TRAFFIC_MGT_SO_BACKGROUND bit for 
-                * sockets created by other threads within this process.  
-                */
-               proc_fdlock(curp);
-               fdp = curp->p_fd;
-               for ( i = 0; i < fdp->fd_nfiles; i++ ) {
-                       struct socket           *sockp;
-                       
-                       fp = fdp->fd_ofiles[ i ];
-                       if ( fp == NULL || (fdp->fd_ofileflags[ i ] & UF_RESERVED) != 0 || 
-                                fp->f_fglob->fg_type != DTYPE_SOCKET ) {
-                               continue;
-                       }
-                       sockp = (struct socket *)fp->f_fglob->fg_data;
-                       if ( sockp->so_background_thread != thread ) {
-                               continue;
-                       }
-                       sockp->so_traffic_mgt_flags &= ~TRAFFIC_MGT_SO_BACKGROUND;
-                       sockp->so_background_thread = NULL;
-               }
-               proc_fdunlock(curp);
-
                return(0);
        }
        
                return(0);
        }
        
index 7303287c109272dfbb47ab896198fbe60360bd72..842a3e5720abbcca16c9ca4a9562fa15ae92f54d 100644 (file)
@@ -2365,6 +2365,47 @@ SYSCTL_PROC(_kern, KERN_NETBOOT, netboot,
                0, 0, sysctl_netboot, "I", "");
 #endif
 
                0, 0, sysctl_netboot, "I", "");
 #endif
 
+#ifdef CONFIG_IMGSRC_ACCESS
+static int
+sysctl_imgsrcdev 
+(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
+{
+       vfs_context_t ctx = vfs_context_current();
+       vnode_t devvp;
+       int result;
+
+       if (!vfs_context_issuser(ctx)) {
+               return EPERM;
+       }    
+
+       if (imgsrc_rootvnode == NULL) {
+               return ENOENT;
+       }    
+
+       result = vnode_getwithref(imgsrc_rootvnode);
+       if (result != 0) {
+               return result;
+       }
+       
+       devvp = vnode_mount(imgsrc_rootvnode)->mnt_devvp;
+       result = vnode_getwithref(devvp);
+       if (result != 0) {
+               goto out;
+       }
+
+       result = sysctl_io_number(req, vnode_specrdev(devvp), sizeof(dev_t), NULL, NULL);
+
+       vnode_put(devvp);
+out:
+       vnode_put(imgsrc_rootvnode);
+       return result;
+}
+
+SYSCTL_PROC(_kern, OID_AUTO, imgsrcdev,
+               CTLTYPE_INT | CTLFLAG_RD,
+               0, 0, sysctl_imgsrcdev, "I", ""); 
+#endif /* CONFIG_IMGSRC_ACCESS */
+
 static int
 sysctl_usrstack
 (__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
 static int
 sysctl_usrstack
 (__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
@@ -2815,3 +2856,12 @@ SYSCTL_INT (_kern, OID_AUTO, stack_size,
 SYSCTL_INT (_kern, OID_AUTO, stack_depth_max,
            CTLFLAG_RD, (int *) &kernel_stack_depth_max, 0, "Max kernel stack depth at interrupt or context switch");
 
 SYSCTL_INT (_kern, OID_AUTO, stack_depth_max,
            CTLFLAG_RD, (int *) &kernel_stack_depth_max, 0, "Max kernel stack depth at interrupt or context switch");
 
+/*
+ * enable back trace for port allocations
+ */
+extern int ipc_portbt;
+
+SYSCTL_INT(_kern, OID_AUTO, ipc_portbt, 
+               CTLFLAG_RW | CTLFLAG_KERN, 
+               &ipc_portbt, 0, "");
+
index 8c58b3ece4e5fd1c3597a341e4077807c21e4eb1..df178d791a3cd693602f1ab26ae5d9f2b0bf6b3f 100644 (file)
@@ -136,6 +136,7 @@ static boolean_t workqueue_run_nextitem(proc_t p, struct workqueue *wq, thread_t
 static void wq_runitem(proc_t p, user_addr_t item, thread_t th, struct threadlist *tl,
                       int reuse_thread, int wake_thread, int return_directly);
 static void wq_unpark_continue(void);
 static void wq_runitem(proc_t p, user_addr_t item, thread_t th, struct threadlist *tl,
                       int reuse_thread, int wake_thread, int return_directly);
 static void wq_unpark_continue(void);
+static void wq_unsuspend_continue(void);
 static int setup_wqthread(proc_t p, thread_t th, user_addr_t item, int reuse_thread, struct threadlist *tl);
 static boolean_t workqueue_addnewthread(struct workqueue *wq);
 static void workqueue_removethread(struct threadlist *tl);
 static int setup_wqthread(proc_t p, thread_t th, user_addr_t item, int reuse_thread, struct threadlist *tl);
 static boolean_t workqueue_addnewthread(struct workqueue *wq);
 static void workqueue_removethread(struct threadlist *tl);
@@ -446,7 +447,6 @@ bsdthread_register(struct proc *p, struct bsdthread_register_args  *uap, __unuse
        return(0);
 }
 
        return(0);
 }
 
-
 uint32_t wq_yielded_threshold          = WQ_YIELDED_THRESHOLD;
 uint32_t wq_yielded_window_usecs       = WQ_YIELDED_WINDOW_USECS;
 uint32_t wq_stalled_window_usecs       = WQ_STALLED_WINDOW_USECS;
 uint32_t wq_yielded_threshold          = WQ_YIELDED_THRESHOLD;
 uint32_t wq_yielded_window_usecs       = WQ_YIELDED_WINDOW_USECS;
 uint32_t wq_stalled_window_usecs       = WQ_STALLED_WINDOW_USECS;
@@ -903,15 +903,11 @@ workqueue_callback(int type, thread_t thread)
                 * the thread lock for the thread being UNBLOCKED
                 * is also held
                 */
                 * the thread lock for the thread being UNBLOCKED
                 * is also held
                 */
-               if (tl->th_suspended) {
-                       OSAddAtomic(-1, &tl->th_suspended);
-                       KERNEL_DEBUG1(0xefffd024, wq, wq->wq_threads_scheduled, tl->th_priority, tl->th_affinity_tag, thread_tid(thread));
-               } else {
-                       OSAddAtomic(1, &wq->wq_thactive_count[tl->th_priority][tl->th_affinity_tag]);
+                OSAddAtomic(1, &wq->wq_thactive_count[tl->th_priority][tl->th_affinity_tag]);
 
 
-                       KERNEL_DEBUG1(0xefffd020 | DBG_FUNC_END, wq, wq->wq_threads_scheduled, tl->th_priority, tl->th_affinity_tag, thread_tid(thread));
-               }
-               break;
+                KERNEL_DEBUG1(0xefffd020 | DBG_FUNC_END, wq, wq->wq_threads_scheduled, tl->th_priority, tl->th_affinity_tag, thread_tid(thread));
+
+                break;
        }
 }
 
        }
 }
 
@@ -986,7 +982,7 @@ workqueue_addnewthread(struct workqueue *wq)
        p = wq->wq_proc;
        workqueue_unlock(p);
 
        p = wq->wq_proc;
        workqueue_unlock(p);
 
-       kret = thread_create_workq(wq->wq_task, &th);
+       kret = thread_create_workq(wq->wq_task, (thread_continue_t)wq_unsuspend_continue, &th);
 
        if (kret != KERN_SUCCESS)
                goto failed;
 
        if (kret != KERN_SUCCESS)
                goto failed;
@@ -1046,7 +1042,6 @@ workqueue_addnewthread(struct workqueue *wq)
        tl->th_affinity_tag = -1;
        tl->th_priority = WORKQUEUE_NUMPRIOS;
        tl->th_policy = -1;
        tl->th_affinity_tag = -1;
        tl->th_priority = WORKQUEUE_NUMPRIOS;
        tl->th_policy = -1;
-       tl->th_suspended = 1;
 
 #if defined(__ppc__)
        //ml_fp_setvalid(FALSE);
 
 #if defined(__ppc__)
        //ml_fp_setvalid(FALSE);
@@ -1057,7 +1052,7 @@ workqueue_addnewthread(struct workqueue *wq)
        uth->uu_threadlist = (void *)tl;
 
         workqueue_lock_spin(p);
        uth->uu_threadlist = (void *)tl;
 
         workqueue_lock_spin(p);
-
+       
        TAILQ_INSERT_TAIL(&wq->wq_thidlelist, tl, th_entry);
 
        wq->wq_thidlecount++;
        TAILQ_INSERT_TAIL(&wq->wq_thidlelist, tl, th_entry);
 
        wq->wq_thidlecount++;
@@ -1306,7 +1301,6 @@ workq_kernreturn(struct proc *p, struct workq_kernreturn_args  *uap, __unused in
 
 }
 
 
 }
 
-
 void
 workqueue_exit(struct proc *p)
 {
 void
 workqueue_exit(struct proc *p)
 {
@@ -1457,9 +1451,6 @@ workqueue_removeitem(struct workqueue *wq, int prio, user_addr_t item)
        return (error);
 }
 
        return (error);
 }
 
-
-
-
 static int workqueue_importance[WORKQUEUE_NUMPRIOS] = 
 {
        2, 0, -2,
 static int workqueue_importance[WORKQUEUE_NUMPRIOS] = 
 {
        2, 0, -2,
@@ -1710,14 +1701,11 @@ grab_idle_thread:
                tl->th_flags &= ~TH_LIST_SUSPENDED;
                reuse_thread = 0;
 
                tl->th_flags &= ~TH_LIST_SUSPENDED;
                reuse_thread = 0;
 
-               thread_sched_call(tl->th_thread, workqueue_callback);
-
        } else if ((tl->th_flags & TH_LIST_BLOCKED) == TH_LIST_BLOCKED) {
                tl->th_flags &= ~TH_LIST_BLOCKED;
        } else if ((tl->th_flags & TH_LIST_BLOCKED) == TH_LIST_BLOCKED) {
                tl->th_flags &= ~TH_LIST_BLOCKED;
-               tl->th_flags |= TH_LIST_BUSY;
                wake_thread = 1;
        }
                wake_thread = 1;
        }
-       tl->th_flags |= TH_LIST_RUNNING;
+       tl->th_flags |= TH_LIST_RUNNING | TH_LIST_BUSY;
 
        wq->wq_threads_scheduled++;
        wq->wq_thscheduled_count[priority][affinity_tag]++;
 
        wq->wq_threads_scheduled++;
        wq->wq_thscheduled_count[priority][affinity_tag]++;
@@ -1894,6 +1882,80 @@ parkit:
 }
 
 
 }
 
 
+static void
+wq_unsuspend_continue(void)
+{
+       struct uthread *uth = NULL;
+       thread_t th_to_unsuspend;
+       struct threadlist *tl;
+       proc_t  p;
+
+       th_to_unsuspend = current_thread();
+       uth = get_bsdthread_info(th_to_unsuspend);
+
+       if (uth != NULL && (tl = uth->uu_threadlist) != NULL) {
+               
+               if ((tl->th_flags & (TH_LIST_RUNNING | TH_LIST_BUSY)) == TH_LIST_RUNNING) {
+                       /*
+                        * most likely a normal resume of this thread occurred...
+                        * it's also possible that the thread was aborted after we
+                        * finished setting it up so that it could be dispatched... if
+                        * so, thread_bootstrap_return will notice the abort and put
+                        * the thread on the path to self-destruction
+                        */
+normal_resume_to_user:
+                       thread_sched_call(th_to_unsuspend, workqueue_callback);
+
+                       thread_bootstrap_return();
+               }
+               /*
+                * if we get here, it's because we've been resumed due to
+                * an abort of this thread (process is crashing)
+                */
+               p = current_proc();
+
+               workqueue_lock_spin(p);
+
+               if (tl->th_flags & TH_LIST_SUSPENDED) {
+                       /*
+                        * thread has been aborted while still on our idle
+                        * queue... remove it from our domain...
+                        * workqueue_removethread consumes the lock
+                        */
+                       workqueue_removethread(tl);
+
+                       thread_bootstrap_return();
+               }
+               while ((tl->th_flags & TH_LIST_BUSY)) {
+                       /*
+                        * this thread was aborted after we started making
+                        * it runnable, but before we finished dispatching it...
+                        * we need to wait for that process to finish,
+                        * and we need to ask for a wakeup instead of a
+                        * thread_resume since the abort has already resumed us
+                        */
+                       tl->th_flags |= TH_LIST_NEED_WAKEUP;
+
+                       assert_wait((caddr_t)tl, (THREAD_UNINT));
+
+                       workqueue_unlock(p);
+
+                       thread_block(THREAD_CONTINUE_NULL);
+
+                       workqueue_lock_spin(p);
+               }
+               workqueue_unlock(p);
+               /*
+                * we have finished setting up the thread's context...
+                * thread_bootstrap_return will take us through the abort path
+                * where the thread will self destruct
+                */
+               goto normal_resume_to_user;
+       }
+       thread_bootstrap_return();
+}
+
+
 static void
 wq_unpark_continue(void)
 {
 static void
 wq_unpark_continue(void)
 {
@@ -1996,11 +2058,19 @@ wq_runitem(proc_t p, user_addr_t item, thread_t th, struct threadlist *tl,
        } else {
                KERNEL_DEBUG1(0xefffd014 | DBG_FUNC_END, tl->th_workq, 0, 0, thread_tid(current_thread()), thread_tid(th));
 
        } else {
                KERNEL_DEBUG1(0xefffd014 | DBG_FUNC_END, tl->th_workq, 0, 0, thread_tid(current_thread()), thread_tid(th));
 
-               thread_resume(th);
+               workqueue_lock_spin(p);
+
+               if (tl->th_flags & TH_LIST_NEED_WAKEUP)
+                       wakeup(tl);
+               else
+                       thread_resume(th);
+
+               tl->th_flags &= ~(TH_LIST_BUSY | TH_LIST_NEED_WAKEUP);
+               
+               workqueue_unlock(p);
        }
 }
 
        }
 }
 
-
 int
 setup_wqthread(proc_t p, thread_t th, user_addr_t item, int reuse_thread, struct threadlist *tl)
 {
 int
 setup_wqthread(proc_t p, thread_t th, user_addr_t item, int reuse_thread, struct threadlist *tl)
 {
index 3da6c6d55bfa076b1c217afa2272ac0952cf4845..11a276bbd86f328a76b707771392698225a7c4f7 100644 (file)
@@ -239,8 +239,7 @@ pread_nocancel(struct proc *p, struct pread_nocancel_args *uap, user_ssize_t *re
        
        donefileread(p, fp, fd);
 
        
        donefileread(p, fp, fd);
 
-       if (!error)
-           KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_pread) | DBG_FUNC_NONE),
+       KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_pread) | DBG_FUNC_NONE),
              uap->fd, uap->nbyte, (unsigned int)((uap->offset >> 32)), (unsigned int)(uap->offset), 0);
 
 out:
              uap->fd, uap->nbyte, (unsigned int)((uap->offset >> 32)), (unsigned int)(uap->offset), 0);
 
 out:
@@ -531,8 +530,7 @@ errout:
        else
                fp_drop(p, fd, fp, 0);
 
        else
                fp_drop(p, fd, fp, 0);
 
-       if (!error)
-           KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_pwrite) | DBG_FUNC_NONE),
+       KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_pwrite) | DBG_FUNC_NONE),
              uap->fd, uap->nbyte, (unsigned int)((uap->offset >> 32)), (unsigned int)(uap->offset), 0);
        
         return(error);
              uap->fd, uap->nbyte, (unsigned int)((uap->offset >> 32)), (unsigned int)(uap->offset), 0);
        
         return(error);
index d82fc7f835a288b50e115185c1f603654b3fc0af..5ab2dd50b03eacfcfcc57e25f4fb373a5009e309 100644 (file)
 362    AUE_KQUEUE      ALL     { int kqueue(void); } 
 363    AUE_NULL        ALL     { int kevent(int fd, const struct kevent *changelist, int nchanges, struct kevent *eventlist, int nevents, const struct timespec *timeout); } 
 364    AUE_LCHOWN      ALL     { int lchown(user_addr_t path, uid_t owner, gid_t group); }
 362    AUE_KQUEUE      ALL     { int kqueue(void); } 
 363    AUE_NULL        ALL     { int kevent(int fd, const struct kevent *changelist, int nchanges, struct kevent *eventlist, int nevents, const struct timespec *timeout); } 
 364    AUE_LCHOWN      ALL     { int lchown(user_addr_t path, uid_t owner, gid_t group); }
-365    AUE_STACKSNAPSHOT       ALL     { int stack_snapshot(pid_t pid, user_addr_t tracebuf, uint32_t tracebuf_size, uint32_t options) NO_SYSCALL_STUB; }
+365    AUE_STACKSNAPSHOT       ALL     { int stack_snapshot(pid_t pid, user_addr_t tracebuf, uint32_t tracebuf_size, uint32_t flags, uint32_t dispatch_offset) NO_SYSCALL_STUB; }
 #if CONFIG_WORKQUEUE
 366    AUE_NULL        ALL     { int bsdthread_register(user_addr_t threadstart, user_addr_t wqthread, int pthsize,user_addr_t dummy_value, user_addr_t targetconc_ptr, uint64_t dispatchqueue_offset) NO_SYSCALL_STUB; } 
 367    AUE_WORKQOPEN   ALL     { int workq_open(void) NO_SYSCALL_STUB; }
 #if CONFIG_WORKQUEUE
 366    AUE_NULL        ALL     { int bsdthread_register(user_addr_t threadstart, user_addr_t wqthread, int pthsize,user_addr_t dummy_value, user_addr_t targetconc_ptr, uint64_t dispatchqueue_offset) NO_SYSCALL_STUB; } 
 367    AUE_WORKQOPEN   ALL     { int workq_open(void) NO_SYSCALL_STUB; }
index 26f38c8f5238edce4f38790a7bf02fbd551ed2b7..202f2d8588dcda57dd3388049473c3e799ec9ef7 100644 (file)
@@ -780,6 +780,8 @@ unp_attach(struct socket *so)
 static void
 unp_detach(struct unpcb *unp)
 {
 static void
 unp_detach(struct unpcb *unp)
 {
+       int so_locked = 1;
+
        lck_rw_lock_exclusive(unp_list_mtx);
        LIST_REMOVE(unp, unp_link);
        lck_rw_done(unp_list_mtx);
        lck_rw_lock_exclusive(unp_list_mtx);
        LIST_REMOVE(unp, unp_link);
        lck_rw_done(unp_list_mtx);
@@ -805,13 +807,46 @@ unp_detach(struct unpcb *unp)
        if (unp->unp_conn)
                unp_disconnect(unp);
        while (unp->unp_refs.lh_first) {
        if (unp->unp_conn)
                unp_disconnect(unp);
        while (unp->unp_refs.lh_first) {
-               struct unpcb *unp2 = unp->unp_refs.lh_first;
-               socket_unlock(unp->unp_socket, 0);
-               socket_lock(unp2->unp_socket, 1);
-               unp_drop(unp2, ECONNRESET);
-               socket_unlock(unp2->unp_socket, 1);
+               struct unpcb *unp2 = NULL;
+
+               /* This datagram socket is connected to one or more
+                * sockets. In order to avoid a race condition between removing
+                * this reference and closing the connected socket, we need 
+                * to check disconnect_in_progress
+                */
+               if (so_locked == 1) {
+                       socket_unlock(unp->unp_socket, 0);
+                       so_locked = 0;
+               }
+               lck_mtx_lock(unp_disconnect_lock);
+               while (disconnect_in_progress != 0) {
+                       (void)msleep((caddr_t)&disconnect_in_progress, unp_disconnect_lock,
+                               PSOCK, "disconnect", NULL);
+               }
+               disconnect_in_progress = 1;
+               lck_mtx_unlock(unp_disconnect_lock);
+
+               /* Now we are sure that any unpcb socket disconnect is not happening */
+               if (unp->unp_refs.lh_first != NULL) {
+                       unp2 = unp->unp_refs.lh_first;
+                       socket_lock(unp2->unp_socket, 1);
+               }
+               
+               lck_mtx_lock(unp_disconnect_lock);
+               disconnect_in_progress = 0;
+               wakeup(&disconnect_in_progress);
+               lck_mtx_unlock(unp_disconnect_lock);
+                       
+               if (unp2 != NULL) {
+                       /* We already locked this socket and have a reference on it */
+                       unp_drop(unp2, ECONNRESET);
+                       socket_unlock(unp2->unp_socket, 1);
+               }
+       }
+
+       if (so_locked == 0) {
                socket_lock(unp->unp_socket, 0);
                socket_lock(unp->unp_socket, 0);
+               so_locked = 1;
        }
        soisdisconnected(unp->unp_socket);
        /* makes sure we're getting dealloced */
        }
        soisdisconnected(unp->unp_socket);
        /* makes sure we're getting dealloced */
@@ -1160,9 +1195,7 @@ unp_connect2(struct socket *so, struct socket *so2)
        switch (so->so_type) {
 
        case SOCK_DGRAM:
        switch (so->so_type) {
 
        case SOCK_DGRAM:
-               lck_rw_lock_exclusive(unp_list_mtx);
                LIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_reflink);
                LIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_reflink);
-               lck_rw_done(unp_list_mtx);
 
                
                /* Avoid lock order reversals due to drop/acquire in soisconnected. */
 
                
                /* Avoid lock order reversals due to drop/acquire in soisconnected. */
@@ -1292,9 +1325,7 @@ try_again:
        switch (unp->unp_socket->so_type) {
 
        case SOCK_DGRAM:
        switch (unp->unp_socket->so_type) {
 
        case SOCK_DGRAM:
-               lck_rw_lock_exclusive(unp_list_mtx);
                LIST_REMOVE(unp, unp_reflink);
                LIST_REMOVE(unp, unp_reflink);
-               lck_rw_done(unp_list_mtx);
                unp->unp_socket->so_state &= ~SS_ISCONNECTED;
                socket_unlock(so2, 1);
                break;
                unp->unp_socket->so_state &= ~SS_ISCONNECTED;
                socket_unlock(so2, 1);
                break;
index 920fbe064e55efe297a3f4b3b5e95876b073f3d5..1ea89d1cc46d125fe1650e4a6befad524bdf9906 100644 (file)
@@ -34,7 +34,8 @@ KERNELFILES= \
 PRIVATE_DATAFILES = \
        if_atm.h if_vlan_var.h if_ppp.h firewire.h \
        ppp_defs.h radix.h if_bond_var.h lacp.h ndrv_var.h \
 PRIVATE_DATAFILES = \
        if_atm.h if_vlan_var.h if_ppp.h firewire.h \
        ppp_defs.h radix.h if_bond_var.h lacp.h ndrv_var.h \
-       raw_cb.h etherdefs.h iso88025.h if_pflog.h pfvar.h
+       raw_cb.h etherdefs.h iso88025.h if_pflog.h pfvar.h \
+       if_bridgevar.h
 
 PRIVATE_KERNELFILES = ${KERNELFILES} \
        bpfdesc.h dlil_pvt.h ppp_comp.h \
 
 PRIVATE_KERNELFILES = ${KERNELFILES} \
        bpfdesc.h dlil_pvt.h ppp_comp.h \
diff --git a/bsd/net/bridge.c b/bsd/net/bridge.c
deleted file mode 100644 (file)
index 01d3cb7..0000000
+++ /dev/null
@@ -1,906 +0,0 @@
-/*
- * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * Copyright (c) 1998 Luigi Rizzo
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD: src/sys/net/bridge.c,v 1.16.2.14 2001/02/09 23:13:41 luigi Exp $
- */
-
-/*
- * This code implements bridging in FreeBSD. It only acts on ethernet
- * type of interfaces (others are still usable for routing).
- * A bridging table holds the source MAC address/dest. interface for each
- * known node. The table is indexed using an hash of the source address.
- *
- * Input packets are tapped near the beginning of ether_input(), and
- * analysed by calling bridge_in(). Depending on the result, the packet
- * can be forwarded to one or more output interfaces using bdg_forward(),
- * and/or sent to the upper layer (e.g. in case of multicast).
- *
- * Output packets are intercepted near the end of ether_output(),
- * the correct destination is selected calling bridge_dst_lookup(),
- * and then forwarding is done using bdg_forward().
- * Bridging is controlled by the sysctl variable net.link.ether.bridge
- *
- * The arp code is also modified to let a machine answer to requests
- * irrespective of the port the request came from.
- *
- * In case of loops in the bridging topology, the bridge detects this
- * event and temporarily mutes output bridging on one of the ports.
- * Periodically, interfaces are unmuted by bdg_timeout().
- * Muting is only implemented as a safety measure, and also as
- * a mechanism to support a user-space implementation of the spanning
- * tree algorithm. In the final release, unmuting will only occur
- * because of explicit action of the user-level daemon.
- *
- * To build a bridging kernel, use the following option
- *    option BRIDGE
- * and then at runtime set the sysctl variable to enable bridging.
- *
- * Only one interface is supposed to have addresses set (but
- * there are no problems in practice if you set addresses for more
- * than one interface).
- * Bridging will act before routing, but nothing prevents a machine
- * from doing both (modulo bugs in the implementation...).
- *
- * THINGS TO REMEMBER
- *  - bridging is incompatible with multicast routing on the same
- *    machine. There is not an easy fix to this.
- *  - loop detection is still not very robust.
- *  - the interface of bdg_forward() could be improved.
- */
-
-#include <sys/param.h>
-#include <sys/mbuf.h>
-#include <sys/malloc.h>
-#include <sys/systm.h>
-#include <sys/socket.h> /* for net/if.h */
-#include <sys/kernel.h>
-#include <sys/sysctl.h>
-
-#include <net/if.h>
-#include <net/if_types.h>
-
-#include <netinet/in.h> /* for struct arpcom */
-#include <netinet/in_systm.h>
-#include <netinet/in_var.h>
-#include <netinet/ip.h>
-#include <netinet/if_ether.h> /* for struct arpcom */
-
-#include "opt_ipfw.h" 
-#include "opt_ipdn.h" 
-
-#if defined(IPFIREWALL)
-#include <net/route.h>
-#include <netinet/ip_fw.h>
-#if defined(DUMMYNET)
-#include <netinet/ip_dummynet.h>
-#endif
-#endif
-
-#include <net/bridge.h>
-
-/*
- * For debugging, you can use the following macros.
- * remember, rdtsc() only works on Pentium-class machines
-
-    quad_t ticks;
-    DDB(ticks = rdtsc();)
-    ... interesting code ...
-    DDB(bdg_fw_ticks += (u_int32_t)(rdtsc() - ticks) ; bdg_fw_count++ ;)
-
- *
- */
-
-#define DDB(x) x
-#define DEB(x)
-
-static void bdginit(void *);
-static void bdgtakeifaces(void);
-static void flush_table(void);
-static void bdg_promisc_on(void);
-static void parse_bdg_cfg(void);
-
-static int bdg_ipfw = 0 ;
-int do_bridge = 0;
-bdg_hash_table *bdg_table = NULL ;
-
-/*
- * System initialization
- */
-
-SYSINIT(interfaces, SI_SUB_PROTO_IF, SI_ORDER_FIRST, bdginit, NULL)
-
-static struct bdg_stats bdg_stats ;
-struct bdg_softc *ifp2sc = NULL ;
-/* XXX make it static of size BDG_MAX_PORTS */
-
-#define        IFP_CHK(ifp, x) \
-       if (ifp2sc[ifp->if_index].magic != 0xDEADBEEF) { x ; }
-
-/*
- * turn off promisc mode, optionally clear the IFF_USED flag.
- * The flag is turned on by parse_bdg_config
- */
-static void
-bdg_promisc_off(int clear_used)
-{
-    struct ifnet *ifp ;
-    ifnet_head_lock_shared();
-    TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
-               if ( (ifp2sc[ifp->if_index].flags & IFF_BDG_PROMISC) ) {
-                       int s, ret ;
-                       s = splimp();
-                       ret = ifnet_set_promiscuous(ifp, 0);
-                       splx(s);
-                       ifp2sc[ifp->if_index].flags &= ~(IFF_BDG_PROMISC|IFF_MUTE) ;
-                       DEB(printf(">> now %s%d promisc OFF if_flags 0x%x bdg_flags 0x%x\n",
-                               ifp->if_name, ifp->if_unit,
-                               ifp->if_flags, ifp2sc[ifp->if_index].flags);)
-               }
-               if (clear_used) {
-                       ifp2sc[ifp->if_index].flags &= ~(IFF_USED) ;
-                       bdg_stats.s[ifp->if_index].name[0] = '\0';
-               }
-    }
-    ifnet_head_done();
-}
-
-/*
- * set promisc mode on the interfaces we use.
- */
-static void
-bdg_promisc_on()
-{
-    struct ifnet *ifp ;
-    int s ;
-
-    ifnet_head_lock_shared();
-    TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
-               if ( !BDG_USED(ifp) )
-                       continue ;
-               if ( 0 == ( ifp->if_flags & IFF_UP) ) {
-                       s = splimp();
-                       if_up(ifp);
-                       splx(s);
-               }
-               if ( !(ifp2sc[ifp->if_index].flags & IFF_BDG_PROMISC) ) {
-                       int ret ;
-                       s = splimp();
-                       ret = ifnet_set_promiscuous(ifp, 1);
-                       splx(s);
-                       ifp2sc[ifp->if_index].flags |= IFF_BDG_PROMISC ;
-                       printf(">> now %s%d promisc ON if_flags 0x%x bdg_flags 0x%x\n",
-                               ifp->if_name, ifp->if_unit,
-                               ifp->if_flags, ifp2sc[ifp->if_index].flags);
-               }
-               if (BDG_MUTED(ifp)) {
-                       printf(">> unmuting %s%d\n", ifp->if_name, ifp->if_unit);
-                       BDG_UNMUTE(ifp) ;
-               }
-    }
-    ifnet_head_done();
-}
-
-static int
-sysctl_bdg(SYSCTL_HANDLER_ARGS)
-{
-    int error, oldval = do_bridge ;
-
-    error = sysctl_handle_int(oidp,
-       oidp->oid_arg1, oidp->oid_arg2, req);
-    DEB( printf("called sysctl for bridge name %s arg2 %d val %d->%d\n",
-       oidp->oid_name, oidp->oid_arg2,
-       oldval, do_bridge); )
-
-    if (bdg_table == NULL)
-       do_bridge = 0 ;
-    if (oldval != do_bridge) {
-       bdg_promisc_off( 1 ); /* reset previously used interfaces */
-       flush_table();
-       if (do_bridge) {
-           parse_bdg_cfg();
-           bdg_promisc_on();
-       }
-    }
-    return error ;
-}
-
-static char bridge_cfg[256] = { "" } ;
-
-/*
- * parse the config string, set IFF_USED, name and cluster_id
- * for all interfaces found.
- */
-static void
-parse_bdg_cfg()
-{
-    char *p, *beg ;
-    int i, l, cluster;
-    struct bdg_softc *b;
-
-    for (p= bridge_cfg; *p ; p++) {
-       /* interface names begin with [a-z]  and continue up to ':' */
-       if (*p < 'a' || *p > 'z')
-           continue ;
-       for ( beg = p ; *p && *p != ':' ; p++ )
-           ;
-       if (*p == 0) /* end of string, ':' not found */
-           return ;
-       l = p - beg ; /* length of name string */
-       p++ ;
-       DEB(printf("-- match beg(%d) <%s> p <%s>\n", l, beg, p);)
-       for (cluster = 0 ; *p && *p >= '0' && *p <= '9' ; p++)
-           cluster = cluster*10 + (*p -'0');
-       /*
-        * now search in bridge strings
-        */
-       for (i=0, b = ifp2sc ; i < if_index ; i++, b++) {
-           char buf[32];
-           struct ifnet *ifp = b->ifp ;
-
-           if (ifp == NULL)
-               continue;
-           sprintf(buf, "%s%d", ifp->if_name, ifp->if_unit);
-           if (!strncmp(beg, buf, l)) { /* XXX not correct for >10 if! */
-               b->cluster_id = htons(cluster) ;
-               b->flags |= IFF_USED ;
-               sprintf(bdg_stats.s[ifp->if_index].name,
-                       "%s%d:%d", ifp->if_name, ifp->if_unit, cluster);
-
-               DEB(printf("--++  found %s\n",
-                   bdg_stats.s[ifp->if_index].name);)
-               break ;
-           }
-       }
-       if (*p == '\0')
-           break ;
-    }
-}
-
-static int
-sysctl_bdg_cfg(SYSCTL_HANDLER_ARGS)
-{
-    int error = 0 ;
-    char oldval[256] ;
-
-    strlcpy(oldval, bridge_cfg, sizeof (oldval));
-
-    error = sysctl_handle_string(oidp,
-           bridge_cfg, oidp->oid_arg2, req);
-    DEB(
-       printf("called sysctl for bridge name %s arg2 %d err %d val %s->%s\n",
-               oidp->oid_name, oidp->oid_arg2,
-               error,
-               oldval, bridge_cfg);
-       )
-    if (strcmp(oldval, bridge_cfg)) {
-       bdg_promisc_off( 1 );  /* reset previously-used interfaces */
-       flush_table();
-       parse_bdg_cfg();        /* and set new ones... */
-       if (do_bridge)
-           bdg_promisc_on();   /* re-enable interfaces */
-    }
-    return error ;
-}
-
-static int
-sysctl_refresh(SYSCTL_HANDLER_ARGS)
-{
-    if (req->newptr)
-           bdgtakeifaces();
-    
-    return 0;
-}
-
-
-SYSCTL_DECL(_net_link_ether);
-SYSCTL_PROC(_net_link_ether, OID_AUTO, bridge_cfg, CTLTYPE_STRING|CTLFLAG_RW,
-           &bridge_cfg, sizeof(bridge_cfg), &sysctl_bdg_cfg, "A",
-           "Bridge configuration");
-
-SYSCTL_PROC(_net_link_ether, OID_AUTO, bridge, CTLTYPE_INT|CTLFLAG_RW,
-           &do_bridge, 0, &sysctl_bdg, "I", "Bridging");
-
-SYSCTL_INT(_net_link_ether, OID_AUTO, bridge_ipfw, CTLFLAG_RW,
-           &bdg_ipfw,0,"Pass bridged pkts through firewall");
-
-#define SY(parent, var, comment)                       \
-       static int var ;                                \
-       SYSCTL_INT(parent, OID_AUTO, var, CTLFLAG_RW, &(var), 0, comment);
-
-int bdg_ipfw_drops;
-SYSCTL_INT(_net_link_ether, OID_AUTO, bridge_ipfw_drop,
-       CTLFLAG_RW, &bdg_ipfw_drops,0,"");
-
-int bdg_ipfw_colls;
-SYSCTL_INT(_net_link_ether, OID_AUTO, bridge_ipfw_collisions,
-       CTLFLAG_RW, &bdg_ipfw_colls,0,"");
-
-SYSCTL_PROC(_net_link_ether, OID_AUTO, bridge_refresh, CTLTYPE_INT|CTLFLAG_WR,
-           NULL, 0, &sysctl_refresh, "I", "iface refresh");
-
-#if 1 /* diagnostic vars */
-
-SY(_net_link_ether, verbose, "Be verbose");
-SY(_net_link_ether, bdg_split_pkts, "Packets split in bdg_forward");
-
-SY(_net_link_ether, bdg_thru, "Packets through bridge");
-
-SY(_net_link_ether, bdg_copied, "Packets copied in bdg_forward");
-
-SY(_net_link_ether, bdg_copy, "Force copy in bdg_forward");
-SY(_net_link_ether, bdg_predict, "Correctly predicted header location");
-
-SY(_net_link_ether, bdg_fw_avg, "Cycle counter avg");
-SY(_net_link_ether, bdg_fw_ticks, "Cycle counter item");
-SY(_net_link_ether, bdg_fw_count, "Cycle counter count");
-#endif
-
-SYSCTL_STRUCT(_net_link_ether, PF_BDG, bdgstats,
-        CTLFLAG_RD, &bdg_stats , bdg_stats, "bridge statistics");
-
-static int bdg_loops ;
-
-/*
- * completely flush the bridge table.
- */
-static void
-flush_table()
-{   
-    int s,i;
-
-    if (bdg_table == NULL)
-       return ;
-    s = splimp();
-    for (i=0; i< HASH_SIZE; i++)
-        bdg_table[i].name= NULL; /* clear table */
-    splx(s);
-}
-
-/*
- * called periodically to flush entries etc.
- */
-static void
-bdg_timeout(void *dummy)
-{
-    static int slowtimer = 0 ;
-    
-    if (bdg_inted == 0) {
-        bdg_init2(0);
-    } else if (do_bridge) {
-        static int age_index = 0 ; /* index of table position to age */
-        int l = age_index + HASH_SIZE/4 ;
-        /*
-         * age entries in the forwarding table.
-         */
-        if (l > HASH_SIZE)
-            l = HASH_SIZE ;
-        for (; age_index < l ; age_index++)
-            if (bdg_table[age_index].used)
-                bdg_table[age_index].used = 0 ;
-            else if (bdg_table[age_index].name) {
-                /* printf("xx flushing stale entry %d\n", age_index); */
-                bdg_table[age_index].name = NULL ;
-            }
-        if (age_index >= HASH_SIZE)
-            age_index = 0 ;
-        
-        if (--slowtimer <= 0 ) {
-            slowtimer = 5 ;
-            
-            bdg_promisc_on() ; /* we just need unmute, really */
-            bdg_loops = 0 ;
-        }
-    }
-    timeout(bdg_timeout, (void *)0, 2*hz );
-}
-
-/*
- * local MAC addresses are held in a small array. This makes comparisons
- * much faster.
- */
-bdg_addr bdg_addresses[BDG_MAX_PORTS];
-int bdg_ports ;
-
-/*
- * initialization of bridge code. This needs to be done after all
- * interfaces have been configured.
- */
-
-static int bdg_inited = 0;
-
-static void
-bdg_init2(void)
-{
-    if (bdg_inited != 0)
-        return;
-    
-    if (bdg_table == NULL) {
-        bdg_table = (struct hash_table *)
-            _MALLOC(HASH_SIZE * sizeof(struct hash_table),
-                    M_IFADDR, M_WAITOK);
-        if (bdg_table == NULL)
-            return;
-
-        flush_table();
-    }
-
-    if (ifp2sc == NULL) {
-        ifp2sc = _MALLOC(BDG_MAX_PORTS * sizeof(struct bdg_softc),
-                         M_IFADDR, M_WAITOK );
-        if (ifp2sc == NULL)
-            return;
-        
-        bzero(ifp2sc, BDG_MAX_PORTS * sizeof(struct bdg_softc) );
-        bdgtakeifaces();
-    }
-    
-    bdg_inited = 1;
-}
-
-static void
-bdginit(void *dummy)
-{
-    /* Initialize first what can't fail */
-    bzero(&bdg_stats, sizeof(bdg_stats) );
-    do_bridge=0;
-    
-    /* Attempt to initialize the rest and start the timer */
-    bdg_timeout(0);
-}
-    
-void
-bdgtakeifaces(void)
-{
-    int i ;
-    struct ifnet *ifp;
-    bdg_addr *p = bdg_addresses ;
-    struct bdg_softc *bp;
-
-    bdg_ports = 0 ;
-    *bridge_cfg = '\0';
-
-    printf("BRIDGE 010131, have %d interfaces\n", if_index);
-    ifnet_head_lock_shared();
-    for (i = 0 , ifp = ifnet.tqh_first ; i < if_index ;
-               i++, ifp = TAILQ_NEXT(ifp, if_link) )
-               if (ifp->if_type == IFT_ETHER) { /* ethernet ? */
-                       ifnet_lladdr_copy_bytes(ifp, p->etheraddr, ETHER_ADDR_LEN);
-                       bp = &ifp2sc[ifp->if_index] ;
-                       sprintf(bridge_cfg + strlen(bridge_cfg),
-                       "%s%d:1,", ifp->if_name, ifp->if_unit);
-                       printf("-- index %d %s type %d phy %d addrl %d addr %6D\n",
-                               ifp->if_index,
-                               bdg_stats.s[ifp->if_index].name,
-                               (int)ifp->if_type, (int) ifp->if_physical,
-                               (int)ifp->if_addrlen,
-                               p->etheraddr, "." );
-                       p++ ;
-                       bp->ifp = ifp ;
-                       bp->flags = IFF_USED ;
-                       bp->cluster_id = htons(1) ;
-                       bp->magic = 0xDEADBEEF ;
-       
-                       sprintf(bdg_stats.s[ifp->if_index].name,
-                       "%s%d:%d", ifp->if_name, ifp->if_unit,
-                       ntohs(bp->cluster_id));
-                       bdg_ports ++ ;
-               }
-       ifnet_head_done();
-}
-
-/*
- * bridge_in() is invoked to perform bridging decision on input packets.
- *
- * On Input:
- *   eh                Ethernet header of the incoming packet.
- *
- * On Return: destination of packet, one of
- *   BDG_BCAST broadcast
- *   BDG_MCAST  multicast
- *   BDG_LOCAL  is only for a local address (do not forward)
- *   BDG_DROP   drop the packet
- *   ifp       ifp of the destination interface.
- *
- * Forwarding is not done directly to give a chance to some drivers
- * to fetch more of the packet, or simply drop it completely.
- */
-
-struct ifnet *
-bridge_in(struct ifnet *ifp, struct ether_header *eh)
-{
-    int index;
-    struct ifnet *dst , *old ;
-    int dropit = BDG_MUTED(ifp) ;
-
-    /*
-     * hash the source address
-     */
-    index= HASH_FN(eh->ether_shost);
-    bdg_table[index].used = 1 ;
-    old = bdg_table[index].name ;
-    if ( old ) { /* the entry is valid. */
-       IFP_CHK(old, printf("bridge_in-- reading table\n") );
-
-        if (!BDG_MATCH( eh->ether_shost, bdg_table[index].etheraddr) ) {
-           bdg_ipfw_colls++ ;
-           bdg_table[index].name = NULL ;
-        } else if (old != ifp) {
-           /*
-            * found a loop. Either a machine has moved, or there
-            * is a misconfiguration/reconfiguration of the network.
-            * First, do not forward this packet!
-            * Record the relocation anyways; then, if loops persist,
-            * suspect a reconfiguration and disable forwarding
-            * from the old interface.
-            */
-           bdg_table[index].name = ifp ; /* relocate address */
-           printf("-- loop (%d) %6D to %s%d from %s%d (%s)\n",
-                       bdg_loops, eh->ether_shost, ".",
-                       ifp->if_name, ifp->if_unit,
-                       old->if_name, old->if_unit,
-                       BDG_MUTED(old) ? "muted":"active");
-           dropit = 1 ;
-           if ( !BDG_MUTED(old) ) {
-               if (++bdg_loops > 10)
-                   BDG_MUTE(old) ;
-           }
-        }
-    }
-
-    /*
-     * now write the source address into the table
-     */
-    if (bdg_table[index].name == NULL) {
-       DEB(printf("new addr %6D at %d for %s%d\n",
-           eh->ether_shost, ".", index, ifp->if_name, ifp->if_unit);)
-       bcopy(eh->ether_shost, bdg_table[index].etheraddr, 6);
-       bdg_table[index].name = ifp ;
-    }
-    dst = bridge_dst_lookup(eh);
-    /* Return values:
-     *   BDG_BCAST, BDG_MCAST, BDG_LOCAL, BDG_UNKNOWN, BDG_DROP, ifp.
-     * For muted interfaces, the first 3 are changed in BDG_LOCAL,
-     * and others to BDG_DROP. Also, for incoming packets, ifp is changed
-     * to BDG_DROP in case ifp == src . These mods are not necessary
-     * for outgoing packets from ether_output().
-     */
-    BDG_STAT(ifp, BDG_IN);
-    switch ((int)dst) {
-    case (int)BDG_BCAST:
-    case (int)BDG_MCAST:
-    case (int)BDG_LOCAL:
-    case (int)BDG_UNKNOWN:
-    case (int)BDG_DROP:
-       BDG_STAT(ifp, dst);
-       break ;
-    default :
-       if (dst == ifp || dropit )
-           BDG_STAT(ifp, BDG_DROP);
-       else
-           BDG_STAT(ifp, BDG_FORWARD);
-       break ;
-    }
-
-    if ( dropit ) {
-       if (dst == BDG_BCAST || dst == BDG_MCAST || dst == BDG_LOCAL)
-           return BDG_LOCAL ;
-       else
-           return BDG_DROP ;
-    } else {
-       return (dst == ifp ? BDG_DROP : dst ) ;
-    }
-}
-
-/*
- * Forward to dst, excluding src port and muted interfaces.
- * If src == NULL, the pkt comes from ether_output, and dst is the real
- * interface the packet is originally sent to. In this case we must forward
- * it to the whole cluster. We never call bdg_forward ether_output on
- * interfaces which are not part of a cluster.
- *
- * The packet is freed if possible (i.e. surely not of interest for
- * the upper layer), otherwise a copy is left for use by the caller
- * (pointer in m0).
- *
- * It would be more efficient to make bdg_forward() always consume
- * the packet, leaving to the caller the task to check if it needs a copy
- * and get one in case. As it is now, bdg_forward() can sometimes make
- * a copy whereas it is not necessary.
- *
- * XXX be careful about eh, it can be a pointer into *m
- */
-struct mbuf *
-bdg_forward(struct mbuf *m0, struct ether_header *const eh, struct ifnet *dst)
-{
-    struct ifnet *src = m0->m_pkthdr.rcvif; /* could be NULL in output */
-    struct ifnet *ifp, *last = NULL ;
-    int s ;
-    int shared = bdg_copy ; /* someone else is using the mbuf */
-    int once = 0;      /* loop only once */
-    struct ifnet *real_dst = dst ; /* real dst from ether_output */
-#ifdef IPFIREWALL
-    struct ip_fw_chain *rule = NULL ; /* did we match a firewall rule ? */
-#endif
-
-    /*
-     * XXX eh is usually a pointer within the mbuf (some ethernet drivers
-     * do that), so we better copy it before doing anything with the mbuf,
-     * or we might corrupt the header.
-     */
-    struct ether_header save_eh = *eh ;
-
-#if defined(IPFIREWALL) && defined(DUMMYNET)
-    if (m0->m_type == MT_DUMMYNET) {
-       /* extract info from dummynet header */
-       rule = (struct ip_fw_chain *)(m0->m_data) ;
-       m0 = m0->m_next ;
-       src = m0->m_pkthdr.rcvif;
-       shared = 0 ; /* For sure this is our own mbuf. */
-    } else
-#endif
-    bdg_thru++; /* only count once */
-
-    if (src == NULL) /* packet from ether_output */
-               dst = bridge_dst_lookup(eh);
-    if (dst == BDG_DROP) { /* this should not happen */
-               printf("xx bdg_forward for BDG_DROP\n");
-               m_freem(m0);
-               return NULL;
-    }
-    if (dst == BDG_LOCAL) { /* this should not happen as well */
-               printf("xx ouch, bdg_forward for local pkt\n");
-               return m0;
-    }
-    if (dst == BDG_BCAST || dst == BDG_MCAST || dst == BDG_UNKNOWN) {
-               ifp = ifnet_head.tqh_first ; /* scan all ports */
-               once = 0 ;
-               if (dst != BDG_UNKNOWN) /* need a copy for the local stack */
-                       shared = 1 ;
-    } else {
-               ifp = dst ;
-               once = 1 ;
-    }
-    if ( (u_int)(ifp) <= (u_int)BDG_FORWARD )
-               panic("bdg_forward: bad dst");
-
-#ifdef IPFIREWALL
-    /*
-     * Do filtering in a very similar way to what is done in ip_output.
-     * Only if firewall is loaded, enabled, and the packet is not
-     * from ether_output() (src==NULL, or we would filter it twice).
-     * Additional restrictions may apply e.g. non-IP, short packets,
-     * and pkts already gone through a pipe.
-     */
-    if (ip_fw_chk_ptr && bdg_ipfw != 0 && src != NULL) {
-       struct ip *ip ;
-       int i;
-
-       if (rule != NULL) /* dummynet packet, already partially processed */
-           goto forward; /* HACK! I should obey the fw_one_pass */
-       if (ntohs(save_eh.ether_type) != ETHERTYPE_IP)
-           goto forward ; /* not an IP packet, ipfw is not appropriate */
-       if (m0->m_pkthdr.len < sizeof(struct ip) )
-           goto forward ; /* header too short for an IP pkt, cannot filter */
-       /*
-        * i need some amt of data to be contiguous, and in case others need
-        * the packet (shared==1) also better be in the first mbuf.
-        */
-       i = min(m0->m_pkthdr.len, max_protohdr) ;
-       if ( shared || m0->m_len < i) {
-           m0 = m_pullup(m0, i) ;
-           if (m0 == NULL) {
-               printf("-- bdg: pullup failed.\n") ;
-               return NULL ;
-           }
-       }
-
-       /*
-        * before calling the firewall, swap fields the same as IP does.
-        * here we assume the pkt is an IP one and the header is contiguous
-        */
-       ip = mtod(m0, struct ip *);
-       NTOHS(ip->ip_len);
-       NTOHS(ip->ip_off);
-
-       /*
-        * The third parameter to the firewall code is the dst. interface.
-        * Since we apply checks only on input pkts we use NULL.
-        * The firewall knows this is a bridged packet as the cookie ptr
-        * is NULL.
-        */
-       i = (*ip_fw_chk_ptr)(&ip, 0, NULL, NULL /* cookie */, &m0, &rule, NULL);
-       if ( (i & IP_FW_PORT_DENY_FLAG) || m0 == NULL) /* drop */
-           return m0 ;
-       /*
-        * If we get here, the firewall has passed the pkt, but the mbuf
-        * pointer might have changed. Restore ip and the fields NTOHS()'d.
-        */
-       ip = mtod(m0, struct ip *);
-       HTONS(ip->ip_len);
-       HTONS(ip->ip_off);
-
-       if (i == 0) /* a PASS rule.  */
-           goto forward ;
-#ifdef DUMMYNET
-       if (i & IP_FW_PORT_DYNT_FLAG) {
-           /*
-            * Pass the pkt to dummynet, which consumes it.
-            * If shared, make a copy and keep the original.
-            * Need to prepend the ethernet header, optimize the common
-            * case of eh pointing already into the original mbuf.
-            */
-           struct mbuf *m ;
-           if (shared) {
-               m = m_copypacket(m0, M_DONTWAIT);
-               if (m == NULL) {
-                   printf("bdg_fwd: copy(1) failed\n");
-                   return m0;
-               }
-           } else {
-               m = m0 ; /* pass the original to dummynet */
-               m0 = NULL ; /* and nothing back to the caller */
-           }
-           if ( (void *)(eh + 1) == (void *)m->m_data) {
-               m->m_data -= ETHER_HDR_LEN ;
-               m->m_len += ETHER_HDR_LEN ;
-               m->m_pkthdr.len += ETHER_HDR_LEN ;
-               bdg_predict++;
-           } else {
-               M_PREPEND(m, ETHER_HDR_LEN, M_DONTWAIT);
-               if (!m && verbose) printf("M_PREPEND failed\n");
-               if (m == NULL) /* nope... */
-                   return m0 ;
-               bcopy(&save_eh, mtod(m, struct ether_header *), ETHER_HDR_LEN);
-           }
-           dummynet_io((i & 0xffff),DN_TO_BDG_FWD,m,real_dst,NULL,0,rule,0);
-           return m0 ;
-       }
-#endif
-       /*
-        * XXX add divert/forward actions...
-        */
-       /* if none of the above matches, we have to drop the pkt */
-       bdg_ipfw_drops++ ;
-       printf("bdg_forward: No rules match, so dropping packet!\n");
-       return m0 ;
-    }
-forward:
-#endif /* IPFIREWALL */
-    /*
-     * Again, bring up the headers in case of shared bufs to avoid
-     * corruptions in the future.
-     */
-    if ( shared ) {
-        int i = min(m0->m_pkthdr.len, max_protohdr) ;
-
-       m0 = m_pullup(m0, i) ;
-       if (m0 == NULL) {
-           printf("-- bdg: pullup2 failed.\n") ;
-           return NULL ;
-       }
-    }
-    /* now real_dst is used to determine the cluster where to forward */
-    if (src != NULL) /* pkt comes from ether_input */
-       real_dst = src ;
-    for (;;) {
-       if (last) { /* need to forward packet leftover from previous loop */
-           struct mbuf *m ;
-           if (shared == 0 && once ) { /* no need to copy */
-               m = m0 ;
-               m0 = NULL ; /* original is gone */
-           } else {
-               m = m_copypacket(m0, M_DONTWAIT);
-               if (m == NULL) {
-                   printf("bdg_forward: sorry, m_copypacket failed!\n");
-                   return m0 ; /* the original is still there... */
-               }
-           }
-           /*
-            * Add header (optimized for the common case of eh pointing
-            * already into the mbuf) and execute last part of ether_output:
-            * queue pkt and start output if interface not yet active.
-            */
-           if ( (void *)(eh + 1) == (void *)m->m_data) {
-               m->m_data -= ETHER_HDR_LEN ;
-               m->m_len += ETHER_HDR_LEN ;
-               m->m_pkthdr.len += ETHER_HDR_LEN ;
-               bdg_predict++;
-           } else {
-               M_PREPEND(m, ETHER_HDR_LEN, M_DONTWAIT);
-               if (!m && verbose) printf("M_PREPEND failed\n");
-               if (m == NULL)
-                   return m0;
-               bcopy(&save_eh, mtod(m, struct ether_header *), ETHER_HDR_LEN);
-           }
-           s = splimp();
-           if (IF_QFULL(&last->if_snd)) {
-               IF_DROP(&last->if_snd);
-#if 0
-               BDG_MUTE(last); /* should I also mute ? */
-#endif
-               splx(s);
-               m_freem(m); /* consume the pkt anyways */
-           } else {
-               last->if_obytes += m->m_pkthdr.len ;
-               if (m->m_flags & M_MCAST)
-                   last->if_omcasts++;
-               if (m->m_pkthdr.len != m->m_len) /* this pkt is on >1 bufs */
-                   bdg_split_pkts++;
-
-               IF_ENQUEUE(&last->if_snd, m);
-               if ((last->if_flags & IFF_OACTIVE) == 0)
-                   (*last->if_start)(last);
-               splx(s);
-           }
-           BDG_STAT(last, BDG_OUT);
-           last = NULL ;
-           if (once)
-               break ;
-       }
-       if (ifp == NULL)
-           break ;
-       /*
-        * If the interface is used for bridging, not muted, not full,
-        * up and running, is not the source interface, and belongs to
-        * the same cluster as the 'real_dst', then send here.
-        */
-       if ( BDG_USED(ifp) && !BDG_MUTED(ifp) && !IF_QFULL(&ifp->if_snd)  &&
-            (ifp->if_flags & (IFF_UP|IFF_RUNNING)) == (IFF_UP|IFF_RUNNING) &&
-            ifp != src && BDG_SAMECLUSTER(ifp, real_dst) )
-           last = ifp ;
-       ifp = TAILQ_NEXT(ifp, if_link) ;
-       if (ifp == NULL)
-           once = 1 ;
-    }
-    DEB(bdg_fw_ticks += (u_int32_t)(rdtsc() - ticks) ; bdg_fw_count++ ;
-       if (bdg_fw_count != 0) bdg_fw_avg = bdg_fw_ticks/bdg_fw_count; )
-    return m0 ;
-}
diff --git a/bsd/net/bridge.h b/bsd/net/bridge.h
deleted file mode 100644 (file)
index faeff42..0000000
+++ /dev/null
@@ -1,211 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * Copyright (c) 1998 Luigi Rizzo
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- */
-#ifndef _NET_BRIDGE_H_
-#define _NET_BRIDGE_H_
-#include <sys/appleapiopts.h>
-
-#warning This is not used by Darwin, do not include
-
-extern int do_bridge;
-/*
- * the hash table for bridge
- */
-typedef struct hash_table {
-    struct ifnet *name ;
-    unsigned char etheraddr[6] ;
-    unsigned short used ;
-} bdg_hash_table ;
-
-extern bdg_hash_table *bdg_table ;
-
-/*
- * We need additional info for the bridge. The bdg_ifp2sc[] array
- * provides a pointer to this struct using the if_index.   
- * bdg_softc has a backpointer to the struct ifnet, the bridge
- * flags, and a cluster (bridging occurs only between port of the
- * same cluster).
- */
-struct bdg_softc {
-    struct ifnet *ifp ;
-    /* also ((struct arpcom *)ifp)->ac_enaddr is the eth. addr */
-    int flags ;
-#define IFF_BDG_PROMISC 0x0001  /* set promisc mode on this if.  */
-#define IFF_MUTE        0x0002  /* mute this if for bridging.   */
-#define IFF_USED        0x0004  /* use this if for bridging.    */
-    short cluster_id ; /* in network format */
-    uint32_t magic;
-} ;
-
-extern struct bdg_softc *ifp2sc;
-
-#define BDG_USED(ifp) (ifp2sc[ifp->if_index].flags & IFF_USED)
-#define BDG_MUTED(ifp) (ifp2sc[ifp->if_index].flags & IFF_MUTE)
-#define BDG_MUTE(ifp) ifp2sc[ifp->if_index].flags |= IFF_MUTE
-#define BDG_UNMUTE(ifp) ifp2sc[ifp->if_index].flags &= ~IFF_MUTE
-#define BDG_CLUSTER(ifp) (ifp2sc[ifp->if_index].cluster_id)
-
-#define BDG_SAMECLUSTER(ifp,src) \
-       (src == NULL || BDG_CLUSTER(ifp) == BDG_CLUSTER(src) )
-
-
-#define BDG_MAX_PORTS 128
-typedef struct _bdg_addr {
-    unsigned char etheraddr[6] ;
-    short cluster_id ;
-} bdg_addr ;
-extern bdg_addr bdg_addresses[BDG_MAX_PORTS];
-extern int bdg_ports ;
-
-/*
- * out of the 6 bytes, the last ones are more "variable". Since
- * we are on a little endian machine, we have to do some gimmick...
- */
-#define HASH_SIZE 8192 /* must be a power of 2 */
-#define HASH_FN(addr)   (      \
-       ntohs( ((short *)addr)[1] ^ ((short *)addr)[2] ) & (HASH_SIZE -1))
-
-#define        IFF_MUTE        IFF_LINK2       /* will need a separate flag... */
-
-struct ifnet *bridge_in(struct ifnet *ifp, struct ether_header *eh);
-/* bdg_forward frees the mbuf if necessary, returning null */
-struct mbuf *bdg_forward(struct mbuf *m0, struct ether_header *eh, struct ifnet *dst);
-
-#ifdef __i386__
-#define BDG_MATCH(a,b) ( \
-    ((unsigned short *)(a))[2] == ((unsigned short *)(b))[2] && \
-    *((unsigned int *)(a)) == *((unsigned int *)(b)) )
-#define IS_ETHER_BROADCAST(a) ( \
-       *((unsigned int *)(a)) == 0xffffffff && \
-       ((unsigned short *)(a))[2] == 0xffff )
-#else
-#warning... must complete these for the alpha etc.
-#define BDG_MATCH(a,b) (!bcmp(a, b, ETHER_ADDR_LEN) )
-#endif
-/*
- * The following constants are not legal ifnet pointers, and are used
- * as return values from the classifier, bridge_dst_lookup()
- * The same values are used as index in the statistics arrays,
- * with BDG_FORWARD replacing specifically forwarded packets.
- */
-#define BDG_BCAST      ( (struct ifnet *)1 )
-#define BDG_MCAST      ( (struct ifnet *)2 )
-#define BDG_LOCAL      ( (struct ifnet *)3 )
-#define BDG_DROP       ( (struct ifnet *)4 )
-#define BDG_UNKNOWN    ( (struct ifnet *)5 )
-#define BDG_IN         ( (struct ifnet *)7 )
-#define BDG_OUT                ( (struct ifnet *)8 )
-#define BDG_FORWARD    ( (struct ifnet *)9 )
-
-#define PF_BDG 3 /* XXX superhack */
-/*
- * statistics, passed up with sysctl interface and ns -p bdg
- */
-
-#define STAT_MAX (int)BDG_FORWARD
-struct bdg_port_stat {
-    char name[16];
-    uint32_t collisions;
-    uint32_t p_in[STAT_MAX+1];
-} ;
-
-struct bdg_stats {
-    struct bdg_port_stat s[16];
-} ;
-
-
-#define BDG_STAT(ifp, type) bdg_stats.s[ifp->if_index].p_in[(int)type]++ 
-#ifdef KERNEL
-/*
- * Find the right pkt destination:
- *     BDG_BCAST       is a broadcast
- *     BDG_MCAST       is a multicast
- *     BDG_LOCAL       is for a local address
- *     BDG_DROP        must be dropped
- *     other           ifp of the dest. interface (incl.self)
- *
- * We assume this is only called for interfaces for which bridging
- * is enabled, i.e. BDG_USED(ifp) is true.
- */
-static __inline
-struct ifnet *
-bridge_dst_lookup(struct ether_header *eh)
-{
-    struct ifnet *dst ;
-    int index ;
-    bdg_addr *p ;
-
-    if (IS_ETHER_BROADCAST(eh->ether_dhost))
-       return BDG_BCAST ;
-    if (eh->ether_dhost[0] & 1)
-       return BDG_MCAST ;
-    /*
-     * Lookup local addresses in case one matches.
-     */
-    for (index = bdg_ports, p = bdg_addresses ; index ; index--, p++ )
-       if (BDG_MATCH(p->etheraddr, eh->ether_dhost) )
-           return BDG_LOCAL ;
-    /*
-     * Look for a possible destination in table
-     */
-    index= HASH_FN( eh->ether_dhost );
-    dst = bdg_table[index].name;
-    if ( dst && BDG_MATCH( bdg_table[index].etheraddr, eh->ether_dhost) )
-       return dst ;
-    else
-       return BDG_UNKNOWN ;
-}
-
-#endif /* KERNEL */
-
-#endif /* _NET_BRIDGE_H_ */
diff --git a/bsd/net/bridgestp.c b/bsd/net/bridgestp.c
new file mode 100644 (file)
index 0000000..1c89582
--- /dev/null
@@ -0,0 +1,1153 @@
+/*
+ * Copyright (c) 2007-2009 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+/*     $fpwf: Revision 1.2  2007/05/17 03:38:46  rnewberry Exp $       */
+/*     $NetBSD: bridgestp.c,v 1.10 2006/11/16 01:33:40 christos Exp $  */
+
+/*
+ * Copyright (c) 2000 Jason L. Wright (jason@thought.net)
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by Jason L. Wright
+ * 4. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * OpenBSD: bridgestp.c,v 1.5 2001/03/22 03:48:29 jason Exp
+ */
+
+/*
+ * Implementation of the spanning tree protocol as defined in
+ * ISO/IEC Final DIS 15802-3 (IEEE P802.1D/D17), May 25, 1998.
+ * (In English: IEEE 802.1D, Draft 17, 1998)
+ */
+
+/*     $NetBSD: if_bridgevar.h,v 1.8 2005/12/10 23:21:38 elad Exp $    */
+
+#include <sys/cdefs.h>
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+#include <sys/kernel.h>
+#include <sys/callout.h>
+
+#include <net/if.h>
+#include <net/if_dl.h>
+#include <net/if_types.h>
+#include <net/if_llc.h>
+
+#include <net/if_ether.h>
+#include <net/if_bridgevar.h>
+#include <net/if_media.h>
+
+#include <net/kpi_interface.h>
+
+/* BPDU message types */
+#define        BSTP_MSGTYPE_CFG        0x00            /* Configuration */
+#define        BSTP_MSGTYPE_TCN        0x80            /* Topology chg notification */
+
+/* BPDU flags */
+#define        BSTP_FLAG_TC            0x01            /* Topology change */
+#define        BSTP_FLAG_TCA           0x80            /* Topology change ack */
+
+#define        BSTP_MESSAGE_AGE_INCR   (1 * 256)       /* in 256ths of a second */
+#define        BSTP_TICK_VAL           (1 * 256)       /* in 256ths of a second */
+
+/*
+ * Because BPDU's do not make nicely aligned structures, two different
+ * declarations are used: bstp_?bpdu (wire representation, packed) and
+ * bstp_*_unit (internal, nicely aligned version).
+ */
+
+/* configuration bridge protocol data unit */
+struct bstp_cbpdu {
+       uint8_t         cbu_dsap;               /* LLC: destination sap */
+       uint8_t         cbu_ssap;               /* LLC: source sap */
+       uint8_t         cbu_ctl;                /* LLC: control */
+       uint16_t        cbu_protoid;            /* protocol id */
+       uint8_t         cbu_protover;           /* protocol version */
+       uint8_t         cbu_bpdutype;           /* message type */
+       uint8_t         cbu_flags;              /* flags (below) */
+
+       /* root id */
+       uint16_t        cbu_rootpri;            /* root priority */
+       uint8_t cbu_rootaddr[6];        /* root address */
+
+       uint32_t        cbu_rootpathcost;       /* root path cost */
+
+       /* bridge id */
+       uint16_t        cbu_bridgepri;          /* bridge priority */
+       uint8_t         cbu_bridgeaddr[6];      /* bridge address */
+
+       uint16_t        cbu_portid;             /* port id */
+       uint16_t        cbu_messageage;         /* current message age */
+       uint16_t        cbu_maxage;             /* maximum age */
+       uint16_t        cbu_hellotime;          /* hello time */
+       uint16_t        cbu_forwarddelay;       /* forwarding delay */
+} __attribute__((__packed__));
+
+/* topology change notification bridge protocol data unit */
+struct bstp_tbpdu {
+       uint8_t         tbu_dsap;               /* LLC: destination sap */
+       uint8_t         tbu_ssap;               /* LLC: source sap */
+       uint8_t         tbu_ctl;                /* LLC: control */
+       uint16_t        tbu_protoid;            /* protocol id */
+       uint8_t         tbu_protover;           /* protocol version */
+       uint8_t         tbu_bpdutype;           /* message type */
+} __attribute__((__packed__));
+
+const uint8_t bstp_etheraddr[] = { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 };
+
+void   bstp_initialize_port(struct bridge_softc *, struct bridge_iflist *);
+void   bstp_ifupdstatus(struct bridge_softc *, struct bridge_iflist *);
+void   bstp_enable_port(struct bridge_softc *, struct bridge_iflist *);
+void   bstp_disable_port(struct bridge_softc *, struct bridge_iflist *);
+void   bstp_enable_change_detection(struct bridge_iflist *);
+void   bstp_disable_change_detection(struct bridge_iflist *);
+int    bstp_root_bridge(struct bridge_softc *sc);
+int    bstp_supersedes_port_info(struct bridge_softc *,
+           struct bridge_iflist *, struct bstp_config_unit *);
+int    bstp_designated_port(struct bridge_softc *, struct bridge_iflist *);
+int    bstp_designated_for_some_port(struct bridge_softc *);
+void   bstp_transmit_config(struct bridge_softc *, struct bridge_iflist *);
+void   bstp_transmit_tcn(struct bridge_softc *);
+void   bstp_received_config_bpdu(struct bridge_softc *,
+           struct bridge_iflist *, struct bstp_config_unit *);
+void   bstp_received_tcn_bpdu(struct bridge_softc *, struct bridge_iflist *,
+           struct bstp_tcn_unit *);
+void   bstp_record_config_information(struct bridge_softc *,
+           struct bridge_iflist *, struct bstp_config_unit *);
+void   bstp_record_config_timeout_values(struct bridge_softc *,
+           struct bstp_config_unit *);
+void   bstp_config_bpdu_generation(struct bridge_softc *);
+void   bstp_send_config_bpdu(struct bridge_softc *, struct bridge_iflist *,
+           struct bstp_config_unit *);
+void   bstp_configuration_update(struct bridge_softc *);
+void   bstp_root_selection(struct bridge_softc *);
+void   bstp_designated_port_selection(struct bridge_softc *);
+void   bstp_become_designated_port(struct bridge_softc *,
+           struct bridge_iflist *);
+void   bstp_port_state_selection(struct bridge_softc *);
+void   bstp_make_forwarding(struct bridge_softc *, struct bridge_iflist *);
+void   bstp_make_blocking(struct bridge_softc *, struct bridge_iflist *);
+void   bstp_set_port_state(struct bridge_iflist *, uint8_t);
+void   bstp_set_bridge_priority(struct bridge_softc *, uint64_t);
+void   bstp_set_port_priority(struct bridge_softc *, struct bridge_iflist *,
+           uint16_t);
+void   bstp_set_path_cost(struct bridge_softc *, struct bridge_iflist *,
+           uint32_t);
+void   bstp_topology_change_detection(struct bridge_softc *);
+void   bstp_topology_change_acknowledged(struct bridge_softc *);
+void   bstp_acknowledge_topology_change(struct bridge_softc *,
+           struct bridge_iflist *);
+
+void   bstp_tick(void *);
+void   bstp_timer_start(struct bridge_timer *, uint16_t);
+void   bstp_timer_stop(struct bridge_timer *);
+int    bstp_timer_expired(struct bridge_timer *, uint16_t);
+
+void   bstp_hold_timer_expiry(struct bridge_softc *, struct bridge_iflist *);
+void   bstp_message_age_timer_expiry(struct bridge_softc *,
+           struct bridge_iflist *);
+void   bstp_forward_delay_timer_expiry(struct bridge_softc *,
+           struct bridge_iflist *);
+void   bstp_topology_change_timer_expiry(struct bridge_softc *);
+void   bstp_tcn_timer_expiry(struct bridge_softc *);
+void   bstp_hello_timer_expiry(struct bridge_softc *);
+
+void
+bstp_transmit_config(struct bridge_softc *sc, struct bridge_iflist *bif)
+{
+       if (bif->bif_hold_timer.active) {
+               bif->bif_config_pending = 1;
+               return;
+       }
+
+       bif->bif_config_bpdu.cu_message_type = BSTP_MSGTYPE_CFG;
+       bif->bif_config_bpdu.cu_rootid = sc->sc_designated_root;
+       bif->bif_config_bpdu.cu_root_path_cost = sc->sc_root_path_cost;
+       bif->bif_config_bpdu.cu_bridge_id = sc->sc_bridge_id;
+       bif->bif_config_bpdu.cu_port_id = bif->bif_port_id;
+
+       if (bstp_root_bridge(sc))
+               bif->bif_config_bpdu.cu_message_age = 0;
+       else
+               bif->bif_config_bpdu.cu_message_age =
+                   sc->sc_root_port->bif_message_age_timer.value +
+                   BSTP_MESSAGE_AGE_INCR;
+
+       bif->bif_config_bpdu.cu_max_age = sc->sc_max_age;
+       bif->bif_config_bpdu.cu_hello_time = sc->sc_hello_time;
+       bif->bif_config_bpdu.cu_forward_delay = sc->sc_forward_delay;
+       bif->bif_config_bpdu.cu_topology_change_acknowledgment
+           = bif->bif_topology_change_acknowledge;
+       bif->bif_config_bpdu.cu_topology_change = sc->sc_topology_change;
+
+       if (bif->bif_config_bpdu.cu_message_age < sc->sc_max_age) {
+               bif->bif_topology_change_acknowledge = 0;
+               bif->bif_config_pending = 0;
+               bstp_send_config_bpdu(sc, bif, &bif->bif_config_bpdu);
+               bstp_timer_start(&bif->bif_hold_timer, 0);
+       }
+}
+
+void
+bstp_send_config_bpdu(struct bridge_softc *sc, struct bridge_iflist *bif,
+    struct bstp_config_unit *cu)
+{
+       struct ifnet *ifp;
+       struct mbuf *m;
+       struct ether_header *eh;
+       struct bstp_cbpdu bpdu;
+
+       ifp = bif->bif_ifp;
+
+       if ((ifp->if_flags & IFF_RUNNING) == 0)
+               return;
+
+       MGETHDR(m, M_DONTWAIT, MT_DATA);
+       if (m == NULL)
+               return;
+
+       eh = mtod(m, struct ether_header *);
+
+       m->m_pkthdr.rcvif = ifp;
+       m->m_pkthdr.len = sizeof(*eh) + sizeof(bpdu);
+       m->m_len = m->m_pkthdr.len;
+
+       bpdu.cbu_ssap = bpdu.cbu_dsap = LLC_8021D_LSAP;
+       bpdu.cbu_ctl = LLC_UI;
+       bpdu.cbu_protoid = htons(0);
+       bpdu.cbu_protover = 0;
+       bpdu.cbu_bpdutype = cu->cu_message_type;
+       bpdu.cbu_flags = (cu->cu_topology_change ? BSTP_FLAG_TC : 0) |
+           (cu->cu_topology_change_acknowledgment ? BSTP_FLAG_TCA : 0);
+
+       bpdu.cbu_rootpri = htons(cu->cu_rootid >> 48);
+       bpdu.cbu_rootaddr[0] = cu->cu_rootid >> 40;
+       bpdu.cbu_rootaddr[1] = cu->cu_rootid >> 32;
+       bpdu.cbu_rootaddr[2] = cu->cu_rootid >> 24;
+       bpdu.cbu_rootaddr[3] = cu->cu_rootid >> 16;
+       bpdu.cbu_rootaddr[4] = cu->cu_rootid >> 8;
+       bpdu.cbu_rootaddr[5] = cu->cu_rootid >> 0;
+
+       bpdu.cbu_rootpathcost = htonl(cu->cu_root_path_cost);
+
+       bpdu.cbu_bridgepri = htons(cu->cu_rootid >> 48);
+       bpdu.cbu_bridgeaddr[0] = cu->cu_rootid >> 40;
+       bpdu.cbu_bridgeaddr[1] = cu->cu_rootid >> 32;
+       bpdu.cbu_bridgeaddr[2] = cu->cu_rootid >> 24;
+       bpdu.cbu_bridgeaddr[3] = cu->cu_rootid >> 16;
+       bpdu.cbu_bridgeaddr[4] = cu->cu_rootid >> 8;
+       bpdu.cbu_bridgeaddr[5] = cu->cu_rootid >> 0;
+
+       bpdu.cbu_portid = htons(cu->cu_port_id);
+       bpdu.cbu_messageage = htons(cu->cu_message_age);
+       bpdu.cbu_maxage = htons(cu->cu_max_age);
+       bpdu.cbu_hellotime = htons(cu->cu_hello_time);
+       bpdu.cbu_forwarddelay = htons(cu->cu_forward_delay);
+
+       memcpy(eh->ether_shost, ifnet_lladdr(ifp), ETHER_ADDR_LEN);
+       memcpy(eh->ether_dhost, bstp_etheraddr, ETHER_ADDR_LEN);
+       eh->ether_type = htons(sizeof(bpdu));
+
+       memcpy(mtod(m, caddr_t) + sizeof(*eh), &bpdu, sizeof(bpdu));
+
+       bridge_enqueue(sc, ifp, m); // APPLE MODIFICATION - no flags param
+}
+
+int
+bstp_root_bridge(struct bridge_softc *sc)
+{
+       return (sc->sc_designated_root == sc->sc_bridge_id);
+}
+
+int
+bstp_supersedes_port_info(struct bridge_softc *sc, struct bridge_iflist *bif,
+    struct bstp_config_unit *cu)
+{
+       if (cu->cu_rootid < bif->bif_designated_root)
+               return (1);
+       if (cu->cu_rootid > bif->bif_designated_root)
+               return (0);
+
+       if (cu->cu_root_path_cost < bif->bif_designated_cost)
+               return (1);
+       if (cu->cu_root_path_cost > bif->bif_designated_cost)
+               return (0);
+
+       if (cu->cu_bridge_id < bif->bif_designated_bridge)
+               return (1);
+       if (cu->cu_bridge_id > bif->bif_designated_bridge)
+               return (0);
+
+       if (sc->sc_bridge_id != cu->cu_bridge_id)
+               return (1);
+       if (cu->cu_port_id <= bif->bif_designated_port)
+               return (1);
+       return (0);
+}
+
+void
+bstp_record_config_information(__unused struct bridge_softc *sc,
+    struct bridge_iflist *bif, struct bstp_config_unit *cu)
+{
+       bif->bif_designated_root = cu->cu_rootid;
+       bif->bif_designated_cost = cu->cu_root_path_cost;
+       bif->bif_designated_bridge = cu->cu_bridge_id;
+       bif->bif_designated_port = cu->cu_port_id;
+       bstp_timer_start(&bif->bif_message_age_timer, cu->cu_message_age);
+}
+
+void
+bstp_record_config_timeout_values(struct bridge_softc *sc,
+    struct bstp_config_unit *config)
+{
+       sc->sc_max_age = config->cu_max_age;
+       sc->sc_hello_time = config->cu_hello_time;
+       sc->sc_forward_delay = config->cu_forward_delay;
+       sc->sc_topology_change = config->cu_topology_change;
+}
+
+void
+bstp_config_bpdu_generation(struct bridge_softc *sc)
+{
+       struct bridge_iflist *bif;
+
+       LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
+               if ((bif->bif_flags & IFBIF_STP) == 0)
+                       continue;
+               if (bstp_designated_port(sc, bif) &&
+                   (bif->bif_state != BSTP_IFSTATE_DISABLED))
+                       bstp_transmit_config(sc, bif);
+       }
+}
+
+int
+bstp_designated_port(struct bridge_softc *sc, struct bridge_iflist *bif)
+{
+       return ((bif->bif_designated_bridge == sc->sc_bridge_id)
+           && (bif->bif_designated_port == bif->bif_port_id));
+}
+
+void
+bstp_transmit_tcn(struct bridge_softc *sc)
+{
+       struct bstp_tbpdu bpdu;
+       struct bridge_iflist *bif = sc->sc_root_port;
+       struct ifnet *ifp;
+       struct ether_header *eh;
+       struct mbuf *m;
+
+       KASSERT(bif != NULL, "bstp_transmit_tcn bif NULL");
+       ifp = bif->bif_ifp;
+       if ((ifp->if_flags & IFF_RUNNING) == 0)
+               return;
+
+       MGETHDR(m, M_DONTWAIT, MT_DATA);
+       if (m == NULL)
+               return;
+
+       m->m_pkthdr.rcvif = ifp;
+       m->m_pkthdr.len = sizeof(*eh) + sizeof(bpdu);
+       m->m_len = m->m_pkthdr.len;
+
+       eh = mtod(m, struct ether_header *);
+
+       memcpy(eh->ether_shost, ifnet_lladdr(ifp), ETHER_ADDR_LEN);
+       memcpy(eh->ether_dhost, bstp_etheraddr, ETHER_ADDR_LEN);
+       eh->ether_type = htons(sizeof(bpdu));
+
+       bpdu.tbu_ssap = bpdu.tbu_dsap = LLC_8021D_LSAP;
+       bpdu.tbu_ctl = LLC_UI;
+       bpdu.tbu_protoid = 0;
+       bpdu.tbu_protover = 0;
+       bpdu.tbu_bpdutype = BSTP_MSGTYPE_TCN;
+
+       memcpy(mtod(m, caddr_t) + sizeof(*eh), &bpdu, sizeof(bpdu));
+
+       bridge_enqueue(sc, ifp, m); // APPLE MODIFICATION - no flags param
+}
+
+void
+bstp_configuration_update(struct bridge_softc *sc)
+{
+       bstp_root_selection(sc);
+       bstp_designated_port_selection(sc);
+}
+
+void
+bstp_root_selection(struct bridge_softc *sc)
+{
+       struct bridge_iflist *root_port = NULL, *bif;
+
+       LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
+               if ((bif->bif_flags & IFBIF_STP) == 0)
+                       continue;
+               if (bstp_designated_port(sc, bif))
+                       continue;
+               if (bif->bif_state == BSTP_IFSTATE_DISABLED)
+                       continue;
+               if (bif->bif_designated_root >= sc->sc_bridge_id)
+                       continue;
+               if (root_port == NULL)
+                       goto set_port;
+
+               if (bif->bif_designated_root < root_port->bif_designated_root)
+                       goto set_port;
+               if (bif->bif_designated_root > root_port->bif_designated_root)
+                       continue;
+
+               if ((bif->bif_designated_cost + bif->bif_path_cost) <
+                   (root_port->bif_designated_cost + root_port->bif_path_cost))
+                       goto set_port;
+               if ((bif->bif_designated_cost + bif->bif_path_cost) >
+                   (root_port->bif_designated_cost + root_port->bif_path_cost))
+                       continue;
+
+               if (bif->bif_designated_bridge <
+                   root_port->bif_designated_bridge)
+                       goto set_port;
+               if (bif->bif_designated_bridge >
+                   root_port->bif_designated_bridge)
+                       continue;
+
+               if (bif->bif_designated_port < root_port->bif_designated_port)
+                       goto set_port;
+               if (bif->bif_designated_port > root_port->bif_designated_port)
+                       continue;
+
+               if (bif->bif_port_id >= root_port->bif_port_id)
+                       continue;
+set_port:
+               root_port = bif;
+       }
+
+       sc->sc_root_port = root_port;
+       if (root_port == NULL) {
+               sc->sc_designated_root = sc->sc_bridge_id;
+               sc->sc_root_path_cost = 0;
+       } else {
+               sc->sc_designated_root = root_port->bif_designated_root;
+               sc->sc_root_path_cost = root_port->bif_designated_cost +
+                   root_port->bif_path_cost;
+       }
+}
+
+void
+bstp_designated_port_selection(struct bridge_softc *sc)
+{
+       struct bridge_iflist *bif;
+
+       LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
+               if ((bif->bif_flags & IFBIF_STP) == 0)
+                       continue;
+               if (bstp_designated_port(sc, bif))
+                       goto designated;
+               if (bif->bif_designated_root != sc->sc_designated_root)
+                       goto designated;
+
+               if (sc->sc_root_path_cost < bif->bif_designated_cost)
+                       goto designated;
+               if (sc->sc_root_path_cost > bif->bif_designated_cost)
+                       continue;
+
+               if (sc->sc_bridge_id < bif->bif_designated_bridge)
+                       goto designated;
+               if (sc->sc_bridge_id > bif->bif_designated_bridge)
+                       continue;
+
+               if (bif->bif_port_id > bif->bif_designated_port)
+                       continue;
+designated:
+               bstp_become_designated_port(sc, bif);
+       }
+}
+
+void
+bstp_become_designated_port(struct bridge_softc *sc, struct bridge_iflist *bif)
+{
+       bif->bif_designated_root = sc->sc_designated_root;
+       bif->bif_designated_cost = sc->sc_root_path_cost;
+       bif->bif_designated_bridge = sc->sc_bridge_id;
+       bif->bif_designated_port = bif->bif_port_id;
+}
+
+void
+bstp_port_state_selection(struct bridge_softc *sc)
+{
+       struct bridge_iflist *bif;
+
+       LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
+               if ((bif->bif_flags & IFBIF_STP) == 0)
+                       continue;
+               if (bif == sc->sc_root_port) {
+                       bif->bif_config_pending = 0;
+                       bif->bif_topology_change_acknowledge = 0;
+                       bstp_make_forwarding(sc, bif);
+               } else if (bstp_designated_port(sc, bif)) {
+                       bstp_timer_stop(&bif->bif_message_age_timer);
+                       bstp_make_forwarding(sc, bif);
+               } else {
+                       bif->bif_config_pending = 0;
+                       bif->bif_topology_change_acknowledge = 0;
+                       bstp_make_blocking(sc, bif);
+               }
+       }
+}
+
+void
+bstp_make_forwarding(__unused struct bridge_softc *sc,
+    struct bridge_iflist *bif)
+{
+       if (bif->bif_state == BSTP_IFSTATE_BLOCKING) {
+               bstp_set_port_state(bif, BSTP_IFSTATE_LISTENING);
+               bstp_timer_start(&bif->bif_forward_delay_timer, 0);
+       }
+}
+
+void
+bstp_make_blocking(struct bridge_softc *sc, struct bridge_iflist *bif)
+{
+       if ((bif->bif_state != BSTP_IFSTATE_DISABLED) &&
+           (bif->bif_state != BSTP_IFSTATE_BLOCKING)) {
+               if ((bif->bif_state == BSTP_IFSTATE_FORWARDING) ||
+                   (bif->bif_state == BSTP_IFSTATE_LEARNING)) {
+                       if (bif->bif_change_detection_enabled) {
+                               bstp_topology_change_detection(sc);
+                       }
+               }
+               bstp_set_port_state(bif, BSTP_IFSTATE_BLOCKING);
+               bstp_timer_stop(&bif->bif_forward_delay_timer);
+       }
+}
+
+void
+bstp_set_port_state(struct bridge_iflist *bif, uint8_t state)
+{
+       bif->bif_state = state;
+}
+
+void
+bstp_topology_change_detection(struct bridge_softc *sc)
+{
+       if (bstp_root_bridge(sc)) {
+               sc->sc_topology_change = 1;
+               bstp_timer_start(&sc->sc_topology_change_timer, 0);
+       } else if (!sc->sc_topology_change_detected) {
+               bstp_transmit_tcn(sc);
+               bstp_timer_start(&sc->sc_tcn_timer, 0);
+       }
+       sc->sc_topology_change_detected = 1;
+}
+
+void
+bstp_topology_change_acknowledged(struct bridge_softc *sc)
+{
+       sc->sc_topology_change_detected = 0;
+       bstp_timer_stop(&sc->sc_tcn_timer);
+}
+
+void
+bstp_acknowledge_topology_change(struct bridge_softc *sc,
+    struct bridge_iflist *bif)
+{
+       bif->bif_topology_change_acknowledge = 1;
+       bstp_transmit_config(sc, bif);
+}
+
+__private_extern__ struct mbuf *
+bstp_input(struct bridge_softc *sc, struct ifnet *ifp, struct mbuf *m)
+{
+       struct bridge_iflist *bif = NULL;
+       struct ether_header *eh;
+       struct bstp_tbpdu tpdu;
+       struct bstp_cbpdu cpdu;
+       struct bstp_config_unit cu;
+       struct bstp_tcn_unit tu;
+       uint16_t len;
+
+       eh = mtod(m, struct ether_header *);
+
+       LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
+               if ((bif->bif_flags & IFBIF_STP) == 0)
+                       continue;
+               if (bif->bif_ifp == ifp)
+                       break;
+       }
+       if (bif == NULL)
+               goto out;
+
+       len = ntohs(eh->ether_type);
+       if (len < sizeof(tpdu))
+               goto out;
+
+       m_adj(m, ETHER_HDR_LEN);
+
+       if (m->m_pkthdr.len > len)
+               m_adj(m, len - m->m_pkthdr.len);
+       if ((size_t)m->m_len < sizeof(tpdu) &&
+           (m = m_pullup(m, sizeof(tpdu))) == NULL)
+               goto out;
+
+       memcpy(&tpdu, mtod(m, caddr_t), sizeof(tpdu));
+
+       if (tpdu.tbu_dsap != LLC_8021D_LSAP ||
+           tpdu.tbu_ssap != LLC_8021D_LSAP ||
+           tpdu.tbu_ctl != LLC_UI)
+               goto out;
+       if (tpdu.tbu_protoid != 0 || tpdu.tbu_protover != 0)
+               goto out;
+
+       switch (tpdu.tbu_bpdutype) {
+       case BSTP_MSGTYPE_TCN:
+               tu.tu_message_type = tpdu.tbu_bpdutype;
+               bstp_received_tcn_bpdu(sc, bif, &tu);
+               break;
+       case BSTP_MSGTYPE_CFG:
+               if ((size_t)m->m_len < sizeof(cpdu) &&
+                   (m = m_pullup(m, sizeof(cpdu))) == NULL)
+                       goto out;
+               memcpy(&cpdu, mtod(m, caddr_t), sizeof(cpdu));
+
+               cu.cu_rootid =
+                   (((uint64_t)ntohs(cpdu.cbu_rootpri)) << 48) |
+                   (((uint64_t)cpdu.cbu_rootaddr[0]) << 40) |
+                   (((uint64_t)cpdu.cbu_rootaddr[1]) << 32) |
+                   (((uint64_t)cpdu.cbu_rootaddr[2]) << 24) |
+                   (((uint64_t)cpdu.cbu_rootaddr[3]) << 16) |
+                   (((uint64_t)cpdu.cbu_rootaddr[4]) << 8) |
+                   (((uint64_t)cpdu.cbu_rootaddr[5]) << 0);
+
+               cu.cu_bridge_id =
+                   (((uint64_t)ntohs(cpdu.cbu_bridgepri)) << 48) |
+                   (((uint64_t)cpdu.cbu_bridgeaddr[0]) << 40) |
+                   (((uint64_t)cpdu.cbu_bridgeaddr[1]) << 32) |
+                   (((uint64_t)cpdu.cbu_bridgeaddr[2]) << 24) |
+                   (((uint64_t)cpdu.cbu_bridgeaddr[3]) << 16) |
+                   (((uint64_t)cpdu.cbu_bridgeaddr[4]) << 8) |
+                   (((uint64_t)cpdu.cbu_bridgeaddr[5]) << 0);
+
+               cu.cu_root_path_cost = ntohl(cpdu.cbu_rootpathcost);
+               cu.cu_message_age = ntohs(cpdu.cbu_messageage);
+               cu.cu_max_age = ntohs(cpdu.cbu_maxage);
+               cu.cu_hello_time = ntohs(cpdu.cbu_hellotime);
+               cu.cu_forward_delay = ntohs(cpdu.cbu_forwarddelay);
+               cu.cu_port_id = ntohs(cpdu.cbu_portid);
+               cu.cu_message_type = cpdu.cbu_bpdutype;
+               cu.cu_topology_change_acknowledgment =
+                   (cpdu.cbu_flags & BSTP_FLAG_TCA) ? 1 : 0;
+               cu.cu_topology_change =
+                   (cpdu.cbu_flags & BSTP_FLAG_TC) ? 1 : 0;
+               bstp_received_config_bpdu(sc, bif, &cu);
+               break;
+       default:
+               goto out;
+       }
+
+ out:
+       if (m)
+               m_freem(m);
+       return (NULL);
+}
+
+void
+bstp_received_config_bpdu(struct bridge_softc *sc, struct bridge_iflist *bif,
+    struct bstp_config_unit *cu)
+{
+       int root;
+
+       root = bstp_root_bridge(sc);
+
+       if (bif->bif_state != BSTP_IFSTATE_DISABLED) {
+               if (bstp_supersedes_port_info(sc, bif, cu)) {
+                       bstp_record_config_information(sc, bif, cu);
+                       bstp_configuration_update(sc);
+                       bstp_port_state_selection(sc);
+
+                       if ((bstp_root_bridge(sc) == 0) && root) {
+                               bstp_timer_stop(&sc->sc_hello_timer);
+
+                               if (sc->sc_topology_change_detected) {
+                                       bstp_timer_stop(
+                                           &sc->sc_topology_change_timer);
+                                       bstp_transmit_tcn(sc);
+                                       bstp_timer_start(&sc->sc_tcn_timer, 0);
+                               }
+                       }
+
+                       if (bif == sc->sc_root_port) {
+                               bstp_record_config_timeout_values(sc, cu);
+                               bstp_config_bpdu_generation(sc);
+
+                               if (cu->cu_topology_change_acknowledgment)
+                                       bstp_topology_change_acknowledged(sc);
+                       }
+               } else if (bstp_designated_port(sc, bif))
+                       bstp_transmit_config(sc, bif);
+       }
+}
+
+void
+bstp_received_tcn_bpdu(struct bridge_softc *sc, struct bridge_iflist *bif,
+    __unused struct bstp_tcn_unit *tcn)
+{
+       if (bif->bif_state != BSTP_IFSTATE_DISABLED &&
+           bstp_designated_port(sc, bif)) {
+               bstp_topology_change_detection(sc);
+               bstp_acknowledge_topology_change(sc, bif);
+       }
+}
+
+void
+bstp_hello_timer_expiry(struct bridge_softc *sc)
+{
+       bstp_config_bpdu_generation(sc);
+       bstp_timer_start(&sc->sc_hello_timer, 0);
+}
+
+void
+bstp_message_age_timer_expiry(struct bridge_softc *sc,
+    struct bridge_iflist *bif)
+{
+       int root;
+
+       root = bstp_root_bridge(sc);
+       bstp_become_designated_port(sc, bif);
+       bstp_configuration_update(sc);
+       bstp_port_state_selection(sc);
+
+       if ((bstp_root_bridge(sc)) && (root == 0)) {
+               sc->sc_max_age = sc->sc_bridge_max_age;
+               sc->sc_hello_time = sc->sc_bridge_hello_time;
+               sc->sc_forward_delay = sc->sc_bridge_forward_delay;
+
+               bstp_topology_change_detection(sc);
+               bstp_timer_stop(&sc->sc_tcn_timer);
+               bstp_config_bpdu_generation(sc);
+               bstp_timer_start(&sc->sc_hello_timer, 0);
+       }
+}
+
+void
+bstp_forward_delay_timer_expiry(struct bridge_softc *sc,
+    struct bridge_iflist *bif)
+{
+       if (bif->bif_state == BSTP_IFSTATE_LISTENING) {
+               bstp_set_port_state(bif, BSTP_IFSTATE_LEARNING);
+               bstp_timer_start(&bif->bif_forward_delay_timer, 0);
+       } else if (bif->bif_state == BSTP_IFSTATE_LEARNING) {
+               bstp_set_port_state(bif, BSTP_IFSTATE_FORWARDING);
+               if (bstp_designated_for_some_port(sc) &&
+                   bif->bif_change_detection_enabled)
+                       bstp_topology_change_detection(sc);
+       }
+}
+
+int
+bstp_designated_for_some_port(struct bridge_softc *sc)
+{
+
+       struct bridge_iflist *bif;
+
+       LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
+               if ((bif->bif_flags & IFBIF_STP) == 0)
+                       continue;
+               if (bif->bif_designated_bridge == sc->sc_bridge_id)
+                       return (1);
+       }
+       return (0);
+}
+
+void
+bstp_tcn_timer_expiry(struct bridge_softc *sc)
+{
+       bstp_transmit_tcn(sc);
+       bstp_timer_start(&sc->sc_tcn_timer, 0);
+}
+
+void
+bstp_topology_change_timer_expiry(struct bridge_softc *sc)
+{
+       sc->sc_topology_change_detected = 0;
+       sc->sc_topology_change = 0;
+}
+
+void
+bstp_hold_timer_expiry(struct bridge_softc *sc, struct bridge_iflist *bif)
+{
+       if (bif->bif_config_pending)
+               bstp_transmit_config(sc, bif);
+}
+
+__private_extern__ void
+bstp_initialization(struct bridge_softc *sc)
+{
+       struct bridge_iflist *bif, *mif;
+       struct timespec ts;
+       unsigned char *lladdr;
+
+       lck_mtx_assert(sc->sc_mtx, LCK_MTX_ASSERT_OWNED);
+
+       mif = NULL;
+       LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
+               if ((bif->bif_flags & IFBIF_STP) == 0)
+                       continue;
+               if (bif->bif_ifp->if_type != IFT_ETHER)
+                       continue;
+               bif->bif_port_id = (bif->bif_priority << 8) |
+                   (bif->bif_ifp->if_index & 0xff);
+
+               if (mif == NULL) {
+                       mif = bif;
+                       continue;
+               }
+               if (memcmp(ifnet_lladdr(bif->bif_ifp),
+                   ifnet_lladdr(mif->bif_ifp), ETHER_ADDR_LEN) < 0) {
+                       mif = bif;
+                       continue;
+               }
+       }
+       if (mif == NULL) {
+               bstp_stop(sc);
+               return;
+       }
+
+       lladdr = ifnet_lladdr(mif->bif_ifp);
+       sc->sc_bridge_id =
+           (((uint64_t)sc->sc_bridge_priority) << 48) |
+           (((uint64_t)lladdr[0]) << 40) |
+           (((uint64_t)lladdr[1]) << 32) |
+           (lladdr[2] << 24) |
+           (lladdr[3] << 16) |
+           (lladdr[4] << 8) |
+           (lladdr[5]);
+
+       sc->sc_designated_root = sc->sc_bridge_id;
+       sc->sc_root_path_cost = 0;
+       sc->sc_root_port = NULL;
+
+       sc->sc_max_age = sc->sc_bridge_max_age;
+       sc->sc_hello_time = sc->sc_bridge_hello_time;
+       sc->sc_forward_delay = sc->sc_bridge_forward_delay;
+       sc->sc_topology_change_detected = 0;
+       sc->sc_topology_change = 0;
+       bstp_timer_stop(&sc->sc_tcn_timer);
+       bstp_timer_stop(&sc->sc_topology_change_timer);
+
+       bsd_untimeout(bstp_tick, sc);
+       ts.tv_sec = 1;
+       ts.tv_nsec = 0;
+       bsd_timeout(bstp_tick, sc, &ts);
+
+       LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
+               if (bif->bif_flags & IFBIF_STP)
+                       bstp_enable_port(sc, bif);
+               else
+                       bstp_disable_port(sc, bif);
+       }
+
+       bstp_port_state_selection(sc);
+       bstp_config_bpdu_generation(sc);
+       bstp_timer_start(&sc->sc_hello_timer, 0);
+}
+
+__private_extern__ void
+bstp_stop(struct bridge_softc *sc)
+{
+       struct bridge_iflist *bif;
+
+       LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
+               bstp_set_port_state(bif, BSTP_IFSTATE_DISABLED);
+               bstp_timer_stop(&bif->bif_hold_timer);
+               bstp_timer_stop(&bif->bif_message_age_timer);
+               bstp_timer_stop(&bif->bif_forward_delay_timer);
+       }
+
+       bsd_untimeout(bstp_tick, sc);
+
+       bstp_timer_stop(&sc->sc_topology_change_timer);
+       bstp_timer_stop(&sc->sc_tcn_timer);
+       bstp_timer_stop(&sc->sc_hello_timer);
+
+}
+
+void
+bstp_initialize_port(struct bridge_softc *sc, struct bridge_iflist *bif)
+{
+       bstp_become_designated_port(sc, bif);
+       bstp_set_port_state(bif, BSTP_IFSTATE_BLOCKING);
+       bif->bif_topology_change_acknowledge = 0;
+       bif->bif_config_pending = 0;
+       bif->bif_change_detection_enabled = 1;
+       bstp_timer_stop(&bif->bif_message_age_timer);
+       bstp_timer_stop(&bif->bif_forward_delay_timer);
+       bstp_timer_stop(&bif->bif_hold_timer);
+}
+
+void
+bstp_enable_port(struct bridge_softc *sc, struct bridge_iflist *bif)
+{
+       bstp_initialize_port(sc, bif);
+       bstp_port_state_selection(sc);
+}
+
+void
+bstp_disable_port(struct bridge_softc *sc, struct bridge_iflist *bif)
+{
+       int root;
+
+       root = bstp_root_bridge(sc);
+       bstp_become_designated_port(sc, bif);
+       bstp_set_port_state(bif, BSTP_IFSTATE_DISABLED);
+       bif->bif_topology_change_acknowledge = 0;
+       bif->bif_config_pending = 0;
+       bstp_timer_stop(&bif->bif_message_age_timer);
+       bstp_timer_stop(&bif->bif_forward_delay_timer);
+       bstp_configuration_update(sc);
+       bstp_port_state_selection(sc);
+
+       if (bstp_root_bridge(sc) && (root == 0)) {
+               sc->sc_max_age = sc->sc_bridge_max_age;
+               sc->sc_hello_time = sc->sc_bridge_hello_time;
+               sc->sc_forward_delay = sc->sc_bridge_forward_delay;
+
+               bstp_topology_change_detection(sc);
+               bstp_timer_stop(&sc->sc_tcn_timer);
+               bstp_config_bpdu_generation(sc);
+               bstp_timer_start(&sc->sc_hello_timer, 0);
+       }
+}
+
+void
+bstp_set_bridge_priority(struct bridge_softc *sc, uint64_t new_bridge_id)
+{
+       struct bridge_iflist *bif;
+       int root;
+
+       root = bstp_root_bridge(sc);
+
+       LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
+               if ((bif->bif_flags & IFBIF_STP) == 0)
+                       continue;
+               if (bstp_designated_port(sc, bif))
+                       bif->bif_designated_bridge = new_bridge_id;
+       }
+
+       sc->sc_bridge_id = new_bridge_id;
+
+       bstp_configuration_update(sc);
+       bstp_port_state_selection(sc);
+
+       if (bstp_root_bridge(sc) && (root == 0)) {
+               sc->sc_max_age = sc->sc_bridge_max_age;
+               sc->sc_hello_time = sc->sc_bridge_hello_time;
+               sc->sc_forward_delay = sc->sc_bridge_forward_delay;
+
+               bstp_topology_change_detection(sc);
+               bstp_timer_stop(&sc->sc_tcn_timer);
+               bstp_config_bpdu_generation(sc);
+               bstp_timer_start(&sc->sc_hello_timer, 0);
+       }
+}
+
+void
+bstp_set_port_priority(struct bridge_softc *sc, struct bridge_iflist *bif,
+    uint16_t new_port_id)
+{
+       if (bstp_designated_port(sc, bif))
+               bif->bif_designated_port = new_port_id;
+
+       bif->bif_port_id = new_port_id;
+
+       if ((sc->sc_bridge_id == bif->bif_designated_bridge) &&
+           (bif->bif_port_id < bif->bif_designated_port)) {
+               bstp_become_designated_port(sc, bif);
+               bstp_port_state_selection(sc);
+       }
+}
+
+void
+bstp_set_path_cost(struct bridge_softc *sc, struct bridge_iflist *bif,
+    uint32_t path_cost)
+{
+       bif->bif_path_cost = path_cost;
+       bstp_configuration_update(sc);
+       bstp_port_state_selection(sc);
+}
+
+void
+bstp_enable_change_detection(struct bridge_iflist *bif)
+{
+       bif->bif_change_detection_enabled = 1;
+}
+
+void
+bstp_disable_change_detection(struct bridge_iflist *bif)
+{
+       bif->bif_change_detection_enabled = 0;
+}
+
+void
+bstp_ifupdstatus(struct bridge_softc *sc, struct bridge_iflist *bif)
+{
+       struct ifnet *ifp = bif->bif_ifp;
+    struct ifmediareq   ifmr;
+
+       if ((ifnet_flags(ifp) & IFF_UP)) {
+               bzero(&ifmr, sizeof(ifmr));
+               if (ifnet_ioctl(ifp, 0, SIOCGIFMEDIA, &ifmr) == 0) {
+                       // enable the port when the link is up, or its state is unknown
+                       if ((ifmr.ifm_status & IFM_ACTIVE) || !(ifmr.ifm_status & IFM_AVALID)) {
+                               if (bif->bif_state == BSTP_IFSTATE_DISABLED)
+                                       bstp_enable_port(sc, bif);
+                       } else {
+                               if (bif->bif_state != BSTP_IFSTATE_DISABLED)
+                                       bstp_disable_port(sc, bif);
+                       }
+               }
+               return;
+       }
+
+       if (bif->bif_state != BSTP_IFSTATE_DISABLED)
+               bstp_disable_port(sc, bif);
+}
+
+void
+bstp_tick(void *arg)
+{
+       struct bridge_softc *sc = arg;
+       struct bridge_iflist *bif;
+       struct timespec ts;
+
+       lck_mtx_lock(sc->sc_mtx);
+
+       LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
+               if ((bif->bif_flags & IFBIF_STP) == 0)
+                       continue;
+               /*
+                * XXX This can cause a lag in "link does away"
+                * XXX and "spanning tree gets updated".  We need
+                * XXX come sort of callback from the link state
+                * XXX update code to kick spanning tree.
+                * XXX --thorpej@NetBSD.org
+                */
+               bstp_ifupdstatus(sc, bif);
+       }
+
+       if (bstp_timer_expired(&sc->sc_hello_timer, sc->sc_hello_time))
+               bstp_hello_timer_expiry(sc);
+
+       if (bstp_timer_expired(&sc->sc_tcn_timer, sc->sc_bridge_hello_time))
+               bstp_tcn_timer_expiry(sc);
+
+       if (bstp_timer_expired(&sc->sc_topology_change_timer,
+           sc->sc_topology_change_time))
+               bstp_topology_change_timer_expiry(sc);
+
+       LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
+               if ((bif->bif_flags & IFBIF_STP) == 0)
+                       continue;
+               if (bstp_timer_expired(&bif->bif_message_age_timer,
+                   sc->sc_max_age))
+                       bstp_message_age_timer_expiry(sc, bif);
+       }
+
+       LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
+               if ((bif->bif_flags & IFBIF_STP) == 0)
+                       continue;
+               if (bstp_timer_expired(&bif->bif_forward_delay_timer,
+                   sc->sc_forward_delay))
+                       bstp_forward_delay_timer_expiry(sc, bif);
+
+               if (bstp_timer_expired(&bif->bif_hold_timer,
+                   sc->sc_hold_time))
+                       bstp_hold_timer_expiry(sc, bif);
+       }
+
+       lck_mtx_unlock(sc->sc_mtx);
+
+       /* APPLE MODIFICATION - bridge changes */
+       if (ifnet_flags(sc->sc_if) & IFF_RUNNING) {
+               ts.tv_sec = 1;
+               ts.tv_nsec = 0;
+               bsd_timeout(bstp_tick, sc, &ts);
+       }
+}
+
+void
+bstp_timer_start(struct bridge_timer *t, uint16_t v)
+{
+       t->value = v;
+       t->active = 1;
+}
+
+void
+bstp_timer_stop(struct bridge_timer *t)
+{
+       t->value = 0;
+       t->active = 0;
+}
+
+int
+bstp_timer_expired(struct bridge_timer *t, uint16_t v)
+{
+       if (t->active == 0)
+               return (0);
+       t->value += BSTP_TICK_VAL;
+       if (t->value >= v) {
+               bstp_timer_stop(t);
+               return (1);
+       }
+       return (0);
+
+}
index e82208b12ef23f845d1d71eaddf2ee394d18f18f..254d94b779984ea7990d374d845c3ee1c595a49a 100644 (file)
@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (c) 1999-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 1999-2009 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -1003,6 +1003,14 @@ dlil_interface_filters_input(struct ifnet * ifp, struct mbuf * * m_p,
                        }
                }
        }
                        }
                }
        }
+
+       /*
+        * Strip away M_PROTO1 bit prior to sending packet up the stack as 
+        * it is meant to be local to a subsystem -- if_bridge for M_PROTO1
+        */
+       if (*m_p != NULL)
+               (*m_p)->m_flags &= ~M_PROTO1;
+
        return (0);
 }
 
        return (0);
 }
 
@@ -1350,28 +1358,6 @@ preout_again:
                        }
                }
        
                        }
                }
        
-#if BRIDGE
-               /* !!!LOCKING!!!
-                *
-                * Need to consider how to handle this.
-                * Also note that return should be a goto cleanup
-                */
-               broken-locking
-               if (do_bridge) {
-                       struct mbuf *m0 = m;
-                       struct ether_header *eh = mtod(m, struct ether_header *);
-                       
-                       if (m->m_pkthdr.rcvif)
-                               m->m_pkthdr.rcvif = NULL;
-                       ifp = bridge_dst_lookup(eh);
-                       bdg_forward(&m0, ifp);
-                       if (m0)
-                               m_freem(m0);
-                       
-                       return 0 - should be goto cleanup?
-               }
-#endif
-
                /* 
                 * Let interface filters (if any) do their thing ...
                 */
                /* 
                 * Let interface filters (if any) do their thing ...
                 */
@@ -1389,6 +1375,11 @@ preout_again:
                                }
                        }
                }
                                }
                        }
                }
+               /*
+                * Strip away M_PROTO1 bit prior to sending packet to the driver 
+                * as this field may be used by the driver
+                */
+               m->m_flags &= ~M_PROTO1;
                
                /*
                 * Finally, call the driver.
                
                /*
                 * Finally, call the driver.
@@ -1559,28 +1550,6 @@ preout_again:
                                m->m_pkthdr.rcvif = NULL;
                }
        
                                m->m_pkthdr.rcvif = NULL;
                }
        
-#if BRIDGE
-               /* !!!LOCKING!!!
-                *
-                * Need to consider how to handle this.
-                * Also note that return should be a goto cleanup
-                */
-               broken-locking
-               if (do_bridge) {
-                       struct mbuf *m0 = m;
-                       struct ether_header *eh = mtod(m, struct ether_header *);
-                       
-                       if (m->m_pkthdr.rcvif)
-                               m->m_pkthdr.rcvif = NULL;
-                       ifp = bridge_dst_lookup(eh);
-                       bdg_forward(&m0, ifp);
-                       if (m0)
-                               m_freem(m0);
-                       
-                       return 0 - should be goto cleanup?
-               }
-#endif
-
                /* 
                 * Let interface filters (if any) do their thing ...
                 */
                /* 
                 * Let interface filters (if any) do their thing ...
                 */
@@ -1599,6 +1568,12 @@ preout_again:
                        }
                }
 
                        }
                }
 
+               /*
+                * Strip away M_PROTO1 bit prior to sending packet to the driver
+                * as this field may be used by the driver
+                */
+               m->m_flags &= ~M_PROTO1;
+
                /*
                 * If the underlying interface is not capable of handling a
                 * packet whose data portion spans across physically disjoint
                /*
                 * If the underlying interface is not capable of handling a
                 * packet whose data portion spans across physically disjoint
index 9ae109b852ae96aaf8d05456215d7da07e6cd0fa..1adcbe27e9309569ea904d3057f70e1324b5d9d5 100644 (file)
@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000,2009 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -88,10 +88,6 @@ extern struct ifqueue atalkintrq;
 #endif
 
 
 #endif
 
 
-#if BRIDGE
-#include <net/bridge.h>
-#endif
-
 /* #include "vlan.h" */
 #if NVLAN > 0
 #include <net/if_vlan_var.h>
 /* #include "vlan.h" */
 #if NVLAN > 0
 #include <net/if_vlan_var.h>
index 42e0a67a7f24f091e7865d1aa168623fa2eeb35c..a6ec5b2c53f02b8084347086fb39dcf7f170e340 100644 (file)
@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -98,6 +98,9 @@
 #include <sys/socketvar.h>
 #include <net/if_vlan_var.h>
 #include <net/if_bond_var.h>
 #include <sys/socketvar.h>
 #include <net/if_vlan_var.h>
 #include <net/if_bond_var.h>
+#if IF_BRIDGE
+#include <net/if_bridgevar.h>
+#endif
 
 #include <net/dlil.h>
 
 
 #include <net/dlil.h>
 
@@ -113,10 +116,6 @@ extern struct ifqueue atalkintrq;
 #endif
 
 
 #endif
 
 
-#if BRIDGE
-#include <net/bridge.h>
-#endif
-
 #define memcpy(x,y,z)  bcopy(y, x, z)
 
 
 #define memcpy(x,y,z)  bcopy(y, x, z)
 
 
@@ -636,6 +635,9 @@ __private_extern__ int ether_family_init(void)
 #if BOND
        bond_family_init();
 #endif /* BOND */
 #if BOND
        bond_family_init();
 #endif /* BOND */
+#if IF_BRIDGE
+       bridgeattach(0);
+#endif
 
  done:
 
 
  done:
 
index 52fd3922958f18f5ddc3f67218748247e3e76bf7..371cccfd6c0c9718140468bb8daf7ff946f64a0f 100644 (file)
@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
 extern struct ifqueue pkintrq;
 #endif
 
 extern struct ifqueue pkintrq;
 #endif
 
-
-#if BRIDGE
-#include <net/bridge.h>
-#endif
-
 /* #include "vlan.h" */
 #if NVLAN > 0
 #include <net/if_vlan_var.h>
 /* #include "vlan.h" */
 #if NVLAN > 0
 #include <net/if_vlan_var.h>
index 177631c4b5a4333369663f890c922a8da1279964..422866e737cc91fd89e45cd04af7f59b1912590d 100644 (file)
@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
 
 #include <net/dlil.h>
 
 
 #include <net/dlil.h>
 
-#if BRIDGE
-#include <net/bridge.h>
-#endif
-
 /* #include "vlan.h" */
 #if NVLAN > 0
 #include <net/if_vlan_var.h>
 /* #include "vlan.h" */
 #if NVLAN > 0
 #include <net/if_vlan_var.h>
index efbf23c0f126f09293c64a5c21e0956bced174af..00b7fa5fb8358c6a6c56c7bae62c05cbc68f2668 100644 (file)
@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000,2009 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -95,6 +95,8 @@ struct        ether_addr {
 #define ETHERTYPE_REVARP       0x8035  /* reverse Addr. resolution protocol */
 #define        ETHERTYPE_VLAN          0x8100  /* IEEE 802.1Q VLAN tagging */
 #define ETHERTYPE_IPV6         0x86dd  /* IPv6 */
 #define ETHERTYPE_REVARP       0x8035  /* reverse Addr. resolution protocol */
 #define        ETHERTYPE_VLAN          0x8100  /* IEEE 802.1Q VLAN tagging */
 #define ETHERTYPE_IPV6         0x86dd  /* IPv6 */
+#define ETHERTYPE_PAE          0x888e  /* EAPOL PAE/802.1x */
+#define ETHERTYPE_RSN_PREAUTH  0x88c7  /* 802.11i / RSN Pre-Authentication */
 #define        ETHERTYPE_LOOPBACK      0x9000  /* used to test interfaces */
 /* XXX - add more useful types here */
 
 #define        ETHERTYPE_LOOPBACK      0x9000  /* used to test interfaces */
 /* XXX - add more useful types here */
 
@@ -119,6 +121,9 @@ struct      ether_addr *ether_aton(const char *);
 #ifdef BSD_KERNEL_PRIVATE
 extern u_char  etherbroadcastaddr[ETHER_ADDR_LEN];
 #endif
 #ifdef BSD_KERNEL_PRIVATE
 extern u_char  etherbroadcastaddr[ETHER_ADDR_LEN];
 #endif
+
+#define ETHER_IS_MULTICAST(addr) (*(addr) & 0x01) /* is address mcast/bcast? */
+
 #endif /* KERNEL_PRIVATE */
 
 #ifndef KERNEL
 #endif /* KERNEL_PRIVATE */
 
 #ifndef KERNEL
index 20f3600376a89c98b9b4970864ef9769f98b1a40..229eb134f564601af1f22e6c1e4e211cbcef3b04 100644 (file)
@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (c) 2000-2008 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
 #endif
 
 #ifdef KERNEL_PRIVATE
 #endif
 
 #ifdef KERNEL_PRIVATE
+#define         IF_MAXUNIT      0x7fff  /* historical value */
+
 struct if_clonereq {
        int     ifcr_total;             /* total cloners (out) */
        int     ifcr_count;             /* room for this many in user buffer */
 struct if_clonereq {
        int     ifcr_total;             /* total cloners (out) */
        int     ifcr_count;             /* room for this many in user buffer */
@@ -406,6 +408,34 @@ struct ifmediareq32 {
 #pragma pack()
 #endif /* KERNEL_PRIVATE */
 
 #pragma pack()
 #endif /* KERNEL_PRIVATE */
 
+
+#pragma pack(4)
+struct  ifdrv {
+       char            ifd_name[IFNAMSIZ];     /* if name, e.g. "en0" */
+       unsigned long   ifd_cmd;
+       size_t          ifd_len;
+       void            *ifd_data;
+};
+#pragma pack()
+
+#ifdef KERNEL_PRIVATE
+#pragma pack(4)
+struct ifdrv32 {
+       char            ifd_name[IFNAMSIZ];     /* if name, e.g. "en0" */
+       u_int32_t       ifd_cmd;
+       u_int32_t       ifd_len;
+       user32_addr_t   ifd_data;
+};
+
+struct  ifdrv64 {
+       char            ifd_name[IFNAMSIZ];     /* if name, e.g. "en0" */
+       u_int64_t       ifd_cmd;
+       u_int64_t       ifd_len;
+       user64_addr_t   ifd_data;
+};
+#pragma pack()
+#endif /* KERNEL_PRIVATE */
+
 /* 
  * Structure used to retrieve aux status data from interfaces.
  * Kernel suppliers to this interface should respect the formatting
 /* 
  * Structure used to retrieve aux status data from interfaces.
  * Kernel suppliers to this interface should respect the formatting
diff --git a/bsd/net/if_bridge.c b/bsd/net/if_bridge.c
new file mode 100644 (file)
index 0000000..acce8fa
--- /dev/null
@@ -0,0 +1,3847 @@
+/*
+ * Copyright (c) 2004-2009 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+/*     $apfw: Revision 1.19  2008/10/24 02:34:06  cbzimmer Exp $       */
+/*     $NetBSD: if_bridge.c,v 1.46 2006/11/23 04:07:07 rpaulo Exp $    */
+
+/*
+ * Copyright 2001 Wasabi Systems, Inc.
+ * All rights reserved.
+ *
+ * Written by Jason R. Thorpe for Wasabi Systems, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *     must display the following acknowledgement:
+ *     This product includes software developed for the NetBSD Project by
+ *     Wasabi Systems, Inc.
+ * 4. The name of Wasabi Systems, Inc. may not be used to endorse
+ *     or promote products derived from this software without specific prior
+ *     written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Copyright (c) 1999, 2000 Jason L. Wright (jason@thought.net)
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *     must display the following acknowledgement:
+ *     This product includes software developed by Jason L. Wright
+ * 4. The name of the author may not be used to endorse or promote products
+ *     derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * OpenBSD: if_bridge.c,v 1.60 2001/06/15 03:38:33 itojun Exp
+ */
+
+/*
+ * Network interface bridge support.
+ *
+ * TODO:
+ *
+ *     - Currently only supports Ethernet-like interfaces (Ethernet,
+ *       802.11, VLANs on Ethernet, etc.)  Figure out a nice way
+ *       to bridge other types of interfaces (FDDI-FDDI, and maybe
+ *       consider heterogenous bridges).
+ */
+
+#include <sys/cdefs.h>
+//_KERNEL_RCSID(0, "$NetBSD: if_bridge.c,v 1.46 2006/11/23 04:07:07 rpaulo Exp $");
+
+//#include "opt_bridge_ipf.h"
+//#include "opt_inet.h"
+//#include "opt_pfil_hooks.h"
+//#include "opt_wlan.h"        /* APPLE MODIFICATION <cbz@apple.com> - Proxy STA support */
+//#include "bpfilter.h"
+//#include "gif.h" // APPLE MODIFICATION - add gif support
+
+#define BRIDGE_DEBUG 0
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/mbuf.h>
+#include <sys/queue.h>
+#include <sys/socket.h>
+#include <sys/sockio.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+//#include <sys/pool.h>
+#include <sys/kauth.h>
+#include <sys/random.h>
+#include <sys/kern_event.h>
+#include <sys/systm.h>
+#include <sys/sysctl.h>
+
+#include <libkern/libkern.h>
+
+#include <kern/zalloc.h>
+
+#if NBPFILTER > 0
+#include <net/bpf.h>
+#endif
+#include <net/if.h>
+#include <net/if_dl.h>
+#include <net/if_types.h>
+#include <net/if_llc.h>
+
+#include <net/if_ether.h>
+#include <net/if_bridgevar.h>
+#include <net/dlil.h>
+
+#include <net/kpi_interfacefilter.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>
+#ifdef INET6
+#include <netinet/ip6.h>
+#include <netinet6/in6_var.h>
+#include <netinet6/ip6_var.h>
+#endif
+
+#if BRIDGE_DEBUG
+#define static __private_extern__
+#endif
+
+extern void dlil_input_packet_list(struct ifnet *, struct mbuf *);
+
+/*
+ * Size of the route hash table.  Must be a power of two.
+ */
+/* APPLE MODIFICATION - per Wasabi performance improvement, change the hash table size */
+#if 0
+#ifndef BRIDGE_RTHASH_SIZE
+#define        BRIDGE_RTHASH_SIZE              1024
+#endif
+#else
+#ifndef BRIDGE_RTHASH_SIZE
+#define        BRIDGE_RTHASH_SIZE              256
+#endif
+#endif
+
+/* APPLE MODIFICATION - support for HW checksums */
+#if APPLE_BRIDGE_HWCKSUM_SUPPORT
+#include <netinet/udp.h>
+#include <netinet/tcp.h>
+#endif
+
+#define        BRIDGE_RTHASH_MASK              (BRIDGE_RTHASH_SIZE - 1)
+
+//#include "carp.h"
+#if NCARP > 0
+#include <netinet/in.h>
+#include <netinet/in_var.h>
+#include <netinet/ip_carp.h>
+#endif
+
+/*
+ * Maximum number of addresses to cache.
+ */
+#ifndef BRIDGE_RTABLE_MAX
+#define        BRIDGE_RTABLE_MAX               100
+#endif
+
+/* APPLE MODIFICATION <cbz@apple.com> - add support for Proxy STA */
+#if IEEE80211_PROXYSTA
+/*
+ * Maximum (additional to maxcache) number of proxysta addresses to cache.
+ */
+#ifndef BRIDGE_RTABLE_MAX_PROXYSTA
+#define        BRIDGE_RTABLE_MAX_PROXYSTA              16
+#endif
+#endif
+
+/*
+ * Spanning tree defaults.
+ */
+#define        BSTP_DEFAULT_MAX_AGE            (20 * 256)
+#define        BSTP_DEFAULT_HELLO_TIME         (2 * 256)
+#define        BSTP_DEFAULT_FORWARD_DELAY      (15 * 256)
+#define        BSTP_DEFAULT_HOLD_TIME          (1 * 256)
+#define        BSTP_DEFAULT_BRIDGE_PRIORITY    0x8000
+#define        BSTP_DEFAULT_PORT_PRIORITY      0x80
+#define        BSTP_DEFAULT_PATH_COST          55
+
+/*
+ * Timeout (in seconds) for entries learned dynamically.
+ */
+#ifndef BRIDGE_RTABLE_TIMEOUT
+#define        BRIDGE_RTABLE_TIMEOUT           (20 * 60)       /* same as ARP */
+#endif
+
+/*
+ * Number of seconds between walks of the route list.
+ */
+#ifndef BRIDGE_RTABLE_PRUNE_PERIOD
+#define        BRIDGE_RTABLE_PRUNE_PERIOD      (5 * 60)
+#endif
+
+/*
+ * List of capabilities to mask on the member interface.
+ */
+#define        BRIDGE_IFCAPS_MASK      \
+       (IFCAP_CSUM_IPv4_Tx |   \
+       IFCAP_CSUM_TCPv4_Tx |   \
+       IFCAP_CSUM_UDPv4_Tx |   \
+       IFCAP_CSUM_TCPv6_Tx |   \
+       IFCAP_CSUM_UDPv6_Tx)
+
+
+int    bridge_rtable_prune_period = BRIDGE_RTABLE_PRUNE_PERIOD;
+
+static zone_t bridge_rtnode_pool = NULL;
+
+static errno_t 
+bridge_iff_input(void* cookie, ifnet_t ifp, __unused protocol_family_t protocol,
+                 mbuf_t *data, char **frame_ptr);
+static void 
+bridge_iff_event(void* cookie, ifnet_t ifp, __unused protocol_family_t protocol,
+                 const struct kev_msg *event_msg);
+static void 
+bridge_iff_detached(void* cookie, __unused ifnet_t interface);
+
+static uint32_t
+bridge_rthash(__unused struct bridge_softc *sc, const uint8_t *addr);
+
+static int     bridge_clone_create(struct if_clone *, int);
+static void    bridge_clone_destroy(struct ifnet *);
+
+static errno_t bridge_ioctl(ifnet_t ifp, unsigned long cmd, void *data);
+#if HAS_IF_CAP
+static void    bridge_mutecaps(struct bridge_iflist *, int);
+#endif
+static int     bridge_init(struct ifnet *);
+static void    bridge_stop(struct ifnet *, int);
+
+#if BRIDGE_MEMBER_OUT_FILTER
+static errno_t
+bridge_iff_output(void *cookie, ifnet_t ifp, protocol_family_t protocol, mbuf_t *data);
+static int bridge_output(struct bridge_softc *sc, ifnet_t ifp, mbuf_t m);
+#endif /* BRIDGE_MEMBER_OUT_FILTER */
+
+static errno_t bridge_start(struct ifnet *, mbuf_t);
+static errno_t bridge_set_bpf_tap(ifnet_t ifn, bpf_tap_mode mode, bpf_packet_func bpf_callback);
+__private_extern__ errno_t bridge_bpf_input(ifnet_t ifp, struct mbuf *m);
+__private_extern__ errno_t bridge_bpf_output(ifnet_t ifp, struct mbuf *m);
+
+static void bridge_detach(ifnet_t ifp);
+
+static errno_t bridge_input(struct bridge_iflist *, struct ifnet *, struct mbuf *, void *frame_header);
+
+static void    bridge_forward(struct bridge_softc *, struct mbuf *m);
+
+static void    bridge_timer(void *);
+
+static void    bridge_broadcast(struct bridge_softc *, struct ifnet *,
+                             struct mbuf *, int);
+
+static int     bridge_rtupdate(struct bridge_softc *, const uint8_t *,
+                            struct ifnet *, int, uint8_t);
+static struct ifnet *bridge_rtlookup(struct bridge_softc *, const uint8_t *);
+static void    bridge_rttrim(struct bridge_softc *);
+static void    bridge_rtage(struct bridge_softc *);
+static void    bridge_rtflush(struct bridge_softc *, int);
+/* APPLE MODIFICATION <cbz@apple.com> - add support for Proxy STA */
+#if IEEE80211_PROXYSTA
+static void    bridge_rtdiscovery(struct bridge_softc *);
+static void    bridge_rtpurge(struct bridge_softc *, struct ifnet *);
+#endif
+static int     bridge_rtdaddr(struct bridge_softc *, const uint8_t *);
+
+static int     bridge_rtable_init(struct bridge_softc *);
+static void    bridge_rtable_fini(struct bridge_softc *);
+
+static struct bridge_rtnode *bridge_rtnode_lookup(struct bridge_softc *,
+                                                  const uint8_t *);
+static int     bridge_rtnode_insert(struct bridge_softc *,
+                                 struct bridge_rtnode *);
+static void    bridge_rtnode_destroy(struct bridge_softc *,
+                                  struct bridge_rtnode *);
+
+static struct bridge_iflist *bridge_lookup_member(struct bridge_softc *,
+                                                  const char *name);
+static struct bridge_iflist *bridge_lookup_member_if(struct bridge_softc *,
+                                                     struct ifnet *ifp);
+static void    bridge_delete_member(struct bridge_softc *,
+                                 struct bridge_iflist *);
+
+static void    bridge_ifdetach(struct bridge_iflist *bif, struct ifnet *ifp);
+
+
+static int     bridge_ioctl_add(struct bridge_softc *, void *);
+static int     bridge_ioctl_del(struct bridge_softc *, void *);
+/* APPLE MODIFICATION <cbz@apple.com> - add support for Proxy STA */
+#if IEEE80211_PROXYSTA
+static int bridge_ioctl_purge(struct bridge_softc *sc, void *arg);
+#endif
+static int     bridge_ioctl_gifflags(struct bridge_softc *, void *);
+static int     bridge_ioctl_sifflags(struct bridge_softc *, void *);
+static int     bridge_ioctl_scache(struct bridge_softc *, void *);
+static int     bridge_ioctl_gcache(struct bridge_softc *, void *);
+static int     bridge_ioctl_gifs32(struct bridge_softc *, void *);
+static int     bridge_ioctl_gifs64(struct bridge_softc *, void *);
+static int     bridge_ioctl_rts32(struct bridge_softc *, void *);
+static int     bridge_ioctl_rts64(struct bridge_softc *, void *);
+static int     bridge_ioctl_saddr32(struct bridge_softc *, void *);
+static int     bridge_ioctl_saddr64(struct bridge_softc *, void *);
+static int     bridge_ioctl_sto(struct bridge_softc *, void *);
+static int     bridge_ioctl_gto(struct bridge_softc *, void *);
+static int     bridge_ioctl_daddr32(struct bridge_softc *, void *);
+static int     bridge_ioctl_daddr64(struct bridge_softc *, void *);
+static int     bridge_ioctl_flush(struct bridge_softc *, void *);
+static int     bridge_ioctl_gpri(struct bridge_softc *, void *);
+static int     bridge_ioctl_spri(struct bridge_softc *, void *);
+static int     bridge_ioctl_ght(struct bridge_softc *, void *);
+static int     bridge_ioctl_sht(struct bridge_softc *, void *);
+static int     bridge_ioctl_gfd(struct bridge_softc *, void *);
+static int     bridge_ioctl_sfd(struct bridge_softc *, void *);
+static int     bridge_ioctl_gma(struct bridge_softc *, void *);
+static int     bridge_ioctl_sma(struct bridge_softc *, void *);
+static int     bridge_ioctl_sifprio(struct bridge_softc *, void *);
+static int     bridge_ioctl_sifcost(struct bridge_softc *, void *);
+
+struct bridge_control {
+       int                             (*bc_func)(struct bridge_softc *, void *);
+       unsigned int    bc_argsize;
+       unsigned int    bc_flags;
+};
+
+#define        BC_F_COPYIN             0x01    /* copy arguments in */
+#define        BC_F_COPYOUT            0x02    /* copy arguments out */
+#define        BC_F_SUSER              0x04    /* do super-user check */
+
+static const struct bridge_control bridge_control_table32[] = {
+       { bridge_ioctl_add,             sizeof(struct ifbreq),
+               BC_F_COPYIN|BC_F_SUSER },
+       { bridge_ioctl_del,             sizeof(struct ifbreq),
+               BC_F_COPYIN|BC_F_SUSER },
+       
+       { bridge_ioctl_gifflags,        sizeof(struct ifbreq),
+               BC_F_COPYIN|BC_F_COPYOUT },
+       { bridge_ioctl_sifflags,        sizeof(struct ifbreq),
+               BC_F_COPYIN|BC_F_SUSER },
+       
+       { bridge_ioctl_scache,          sizeof(struct ifbrparam),
+               BC_F_COPYIN|BC_F_SUSER },
+       { bridge_ioctl_gcache,          sizeof(struct ifbrparam),
+               BC_F_COPYOUT },
+       
+       { bridge_ioctl_gifs32,          sizeof(struct ifbifconf32),
+               BC_F_COPYIN|BC_F_COPYOUT },
+       { bridge_ioctl_rts32,           sizeof(struct ifbaconf32),
+               BC_F_COPYIN|BC_F_COPYOUT },
+       
+       { bridge_ioctl_saddr32,         sizeof(struct ifbareq32),
+               BC_F_COPYIN|BC_F_SUSER },
+       
+       { bridge_ioctl_sto,             sizeof(struct ifbrparam),
+               BC_F_COPYIN|BC_F_SUSER },
+       { bridge_ioctl_gto,             sizeof(struct ifbrparam),
+               BC_F_COPYOUT },
+       
+       { bridge_ioctl_daddr32,         sizeof(struct ifbareq32),
+               BC_F_COPYIN|BC_F_SUSER },
+       
+       { bridge_ioctl_flush,           sizeof(struct ifbreq),
+               BC_F_COPYIN|BC_F_SUSER },
+       
+       { bridge_ioctl_gpri,            sizeof(struct ifbrparam),
+               BC_F_COPYOUT },
+       { bridge_ioctl_spri,            sizeof(struct ifbrparam),
+               BC_F_COPYIN|BC_F_SUSER },
+       
+       { bridge_ioctl_ght,             sizeof(struct ifbrparam),
+               BC_F_COPYOUT },
+       { bridge_ioctl_sht,             sizeof(struct ifbrparam),
+               BC_F_COPYIN|BC_F_SUSER },
+       
+       { bridge_ioctl_gfd,             sizeof(struct ifbrparam),
+               BC_F_COPYOUT },
+       { bridge_ioctl_sfd,             sizeof(struct ifbrparam),
+               BC_F_COPYIN|BC_F_SUSER },
+       
+       { bridge_ioctl_gma,             sizeof(struct ifbrparam),
+               BC_F_COPYOUT },
+       { bridge_ioctl_sma,             sizeof(struct ifbrparam),
+               BC_F_COPYIN|BC_F_SUSER },
+       
+       { bridge_ioctl_sifprio,         sizeof(struct ifbreq),
+               BC_F_COPYIN|BC_F_SUSER },
+       
+       { bridge_ioctl_sifcost,         sizeof(struct ifbreq),
+               BC_F_COPYIN|BC_F_SUSER },
+       
+       /* APPLE MODIFICATION <cbz@apple.com> - add support for Proxy STA */
+#if IEEE80211_PROXYSTA
+       { bridge_ioctl_purge,   sizeof(struct ifbreq),
+               BC_F_COPYIN|BC_F_SUSER },
+#endif
+};
+
+static const struct bridge_control bridge_control_table64[] = {
+       { bridge_ioctl_add,             sizeof(struct ifbreq),
+               BC_F_COPYIN|BC_F_SUSER },
+       { bridge_ioctl_del,             sizeof(struct ifbreq),
+               BC_F_COPYIN|BC_F_SUSER },
+       
+       { bridge_ioctl_gifflags,        sizeof(struct ifbreq),
+               BC_F_COPYIN|BC_F_COPYOUT },
+       { bridge_ioctl_sifflags,        sizeof(struct ifbreq),
+               BC_F_COPYIN|BC_F_SUSER },
+       
+       { bridge_ioctl_scache,          sizeof(struct ifbrparam),
+               BC_F_COPYIN|BC_F_SUSER },
+       { bridge_ioctl_gcache,          sizeof(struct ifbrparam),
+               BC_F_COPYOUT },
+       
+       { bridge_ioctl_gifs64,          sizeof(struct ifbifconf64),
+               BC_F_COPYIN|BC_F_COPYOUT },
+       { bridge_ioctl_rts64,           sizeof(struct ifbaconf64),
+               BC_F_COPYIN|BC_F_COPYOUT },
+       
+       { bridge_ioctl_saddr64,         sizeof(struct ifbareq64),
+               BC_F_COPYIN|BC_F_SUSER },
+       
+       { bridge_ioctl_sto,             sizeof(struct ifbrparam),
+               BC_F_COPYIN|BC_F_SUSER },
+       { bridge_ioctl_gto,             sizeof(struct ifbrparam),
+               BC_F_COPYOUT },
+       
+       { bridge_ioctl_daddr64,         sizeof(struct ifbareq64),
+               BC_F_COPYIN|BC_F_SUSER },
+       
+       { bridge_ioctl_flush,           sizeof(struct ifbreq),
+               BC_F_COPYIN|BC_F_SUSER },
+       
+       { bridge_ioctl_gpri,            sizeof(struct ifbrparam),
+               BC_F_COPYOUT },
+       { bridge_ioctl_spri,            sizeof(struct ifbrparam),
+               BC_F_COPYIN|BC_F_SUSER },
+       
+       { bridge_ioctl_ght,             sizeof(struct ifbrparam),
+               BC_F_COPYOUT },
+       { bridge_ioctl_sht,             sizeof(struct ifbrparam),
+               BC_F_COPYIN|BC_F_SUSER },
+       
+       { bridge_ioctl_gfd,             sizeof(struct ifbrparam),
+               BC_F_COPYOUT },
+       { bridge_ioctl_sfd,             sizeof(struct ifbrparam),
+               BC_F_COPYIN|BC_F_SUSER },
+       
+       { bridge_ioctl_gma,             sizeof(struct ifbrparam),
+               BC_F_COPYOUT },
+       { bridge_ioctl_sma,             sizeof(struct ifbrparam),
+               BC_F_COPYIN|BC_F_SUSER },
+       
+       { bridge_ioctl_sifprio,         sizeof(struct ifbreq),
+               BC_F_COPYIN|BC_F_SUSER },
+       
+       { bridge_ioctl_sifcost,         sizeof(struct ifbreq),
+               BC_F_COPYIN|BC_F_SUSER },
+       
+       /* APPLE MODIFICATION <cbz@apple.com> - add support for Proxy STA */
+#if IEEE80211_PROXYSTA
+       { bridge_ioctl_purge,   sizeof(struct ifbreq),
+               BC_F_COPYIN|BC_F_SUSER },
+#endif
+};
+
+static const unsigned int bridge_control_table_size =
+sizeof(bridge_control_table32) / sizeof(bridge_control_table32[0]);
+
+static LIST_HEAD(, bridge_softc) bridge_list = LIST_HEAD_INITIALIZER(bridge_list);
+
+static lck_grp_t *bridge_lock_grp = NULL;
+static lck_attr_t *bridge_lock_attr = NULL;
+
+static lck_rw_t *bridge_list_lock = NULL;
+
+
+static struct if_clone bridge_cloner = 
+       IF_CLONE_INITIALIZER("bridge", 
+                                                bridge_clone_create, 
+                                                bridge_clone_destroy, 
+                                                0, 
+                                                IF_MAXUNIT);
+
+#if BRIDGE_DEBUG
+
+SYSCTL_DECL(_net_link);
+
+SYSCTL_NODE(_net_link, IFT_BRIDGE, bridge, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "Bridge");
+
+__private_extern__ int _if_brige_debug = 0;
+
+SYSCTL_INT(_net_link_bridge, OID_AUTO, debug, CTLFLAG_RW,
+           &_if_brige_debug, 0, "Bridge debug");
+
+static void printf_ether_header(struct ether_header *eh);
+static void printf_mbuf_data(mbuf_t m, size_t offset, size_t len);
+static void printf_mbuf_pkthdr(mbuf_t m, const char *prefix, const char *suffix);
+static void printf_mbuf(mbuf_t m, const char *prefix, const char *suffix);
+static void link_print(struct sockaddr_dl * dl_p);
+
+void
+printf_mbuf_pkthdr(mbuf_t m, const char *prefix, const char *suffix)
+{
+       if (m)
+               printf("%spktlen: %u rcvif: %p header: %p nextpkt: %p%s",
+                          prefix ? prefix : "",
+                          (unsigned int)mbuf_pkthdr_len(m), mbuf_pkthdr_rcvif(m), mbuf_pkthdr_header(m), mbuf_nextpkt(m),
+                          suffix ? suffix : "");
+       else
+               printf("%s<NULL>%s\n", prefix, suffix);
+}
+
+void
+printf_mbuf(mbuf_t m, const char *prefix, const char *suffix)
+{
+       if (m) {
+               printf("%s%p type: %u flags: 0x%x len: %u data: %p maxlen: %u datastart: %p next: %p%s",
+                          prefix ? prefix : "",
+                          m, mbuf_type(m), mbuf_flags(m), (unsigned int)mbuf_len(m), mbuf_data(m), 
+                          (unsigned int)mbuf_maxlen(m), mbuf_datastart(m), mbuf_next(m), 
+                          !suffix || (mbuf_flags(m) & MBUF_PKTHDR) ? "" : suffix);
+               if ((mbuf_flags(m) & MBUF_PKTHDR))
+                       printf_mbuf_pkthdr(m, " ", suffix);
+       } else
+               printf("%s<NULL>%s\n", prefix, suffix);
+}
+
+void
+printf_mbuf_data(mbuf_t m, size_t offset, size_t len)
+{
+       mbuf_t                  n;
+       size_t                  i, j;
+       size_t                  pktlen, mlen, maxlen;
+       unsigned char   *ptr;
+       
+       pktlen = mbuf_pkthdr_len(m);
+       
+       if (offset > pktlen)
+               return;
+       
+       maxlen = (pktlen - offset > len) ? len : pktlen;
+       n = m;
+       mlen = mbuf_len(n);
+       ptr = mbuf_data(n);
+       for (i = 0, j = 0; i < maxlen; i++, j++) {
+               if (j >= mlen) {
+                       n = mbuf_next(n);
+                       if (n == 0)
+                               break;
+                       ptr = mbuf_data(n);
+                       mlen = mbuf_len(n);
+                       j = 0;
+               }
+               if (i >= offset) {
+                       printf("%02x%s", ptr[j], i % 2 ? " " : "");
+               }
+       }
+       return;
+}
+
+static void
+printf_ether_header(struct ether_header *eh)
+{
+       printf("%02x:%02x:%02x:%02x:%02x:%02x > %02x:%02x:%02x:%02x:%02x:%02x 0x%04x ", 
+                  eh->ether_shost[0], eh->ether_shost[1], eh->ether_shost[2], 
+                  eh->ether_shost[3], eh->ether_shost[4], eh->ether_shost[5], 
+                  eh->ether_dhost[0], eh->ether_dhost[1], eh->ether_dhost[2], 
+                  eh->ether_dhost[3], eh->ether_dhost[4], eh->ether_dhost[5], 
+                  eh->ether_type);
+}
+#endif /* BRIDGE_DEBUG */
+
+/*
+ * bridgeattach:
+ *
+ *     Pseudo-device attach routine.
+ */
+__private_extern__ int
+bridgeattach(__unused int n)
+{
+       int error;
+       lck_grp_attr_t *lck_grp_attr = NULL;
+       
+       bridge_rtnode_pool = zinit(sizeof(struct bridge_rtnode), 1024 * sizeof(struct bridge_rtnode),
+                               0, "bridge_rtnode");
+       
+       lck_grp_attr = lck_grp_attr_alloc_init();
+       
+       bridge_lock_grp = lck_grp_alloc_init("if_bridge", lck_grp_attr);
+       
+       bridge_lock_attr = lck_attr_alloc_init();
+       
+#if BRIDGE_DEBUG
+       lck_attr_setdebug(bridge_lock_attr);
+#endif
+
+       bridge_list_lock = lck_rw_alloc_init(bridge_lock_grp, bridge_lock_attr);
+       
+       // can free the attributes once we've allocated the group lock
+       lck_grp_attr_free(lck_grp_attr);
+       
+       LIST_INIT(&bridge_list);
+       error = if_clone_attach(&bridge_cloner);
+
+       return error;
+}
+
+#if BRIDGE_DEBUG
+
+static void
+link_print(struct sockaddr_dl * dl_p)
+{
+       int i;
+       
+#if 1
+       printf("sdl len %d index %d family %d type 0x%x nlen %d alen %d"
+           " slen %d addr ", dl_p->sdl_len,
+           dl_p->sdl_index,  dl_p->sdl_family, dl_p->sdl_type,
+           dl_p->sdl_nlen, dl_p->sdl_alen, dl_p->sdl_slen);
+#endif
+       for (i = 0; i < dl_p->sdl_alen; i++)
+        printf("%s%x", i ? ":" : "",
+               (CONST_LLADDR(dl_p))[i]);
+       printf("\n");
+       return;
+}
+#endif /* BRIDGE_DEBUG */
+
+
+/*
+ * bridge_clone_create:
+ *
+ *     Create a new bridge instance.
+ */
+/* APPLE MODIFICATION <cbz@apple.com> - add opaque <const caddr_t params> argument for cloning.  This is done for 
+ net80211's VAP creation (with the Marvell codebase).  I think this could end up being useful
+ for other devices, too.  This is not in an ifdef because it doesn't hurt anything to have 
+ this extra param */
+static int
+bridge_clone_create(struct if_clone *ifc, int unit)
+{
+       struct bridge_softc *sc = NULL;
+       struct ifnet *ifp = NULL;
+       u_char eaddr[6];
+       uint32_t r;
+       struct ifnet_init_params init_params;
+       errno_t error = 0;
+       uint32_t sdl_buffer[offsetof(struct sockaddr_dl, sdl_data) + IFNAMSIZ + ETHER_ADDR_LEN];
+       struct sockaddr_dl *sdl = (struct sockaddr_dl *)sdl_buffer;
+       
+       sc = _MALLOC(sizeof(*sc), M_DEVBUF, M_WAITOK);
+       memset(sc, 0, sizeof(*sc));
+       
+       sc->sc_brtmax = BRIDGE_RTABLE_MAX;
+       /* APPLE MODIFICATION <cbz@apple.com> - add support for Proxy STA */
+#if IEEE80211_PROXYSTA
+       sc->sc_brtmax_proxysta = BRIDGE_RTABLE_MAX_PROXYSTA;
+#endif
+       sc->sc_brttimeout = BRIDGE_RTABLE_TIMEOUT;
+       sc->sc_bridge_max_age = BSTP_DEFAULT_MAX_AGE;
+       sc->sc_bridge_hello_time = BSTP_DEFAULT_HELLO_TIME;
+       sc->sc_bridge_forward_delay = BSTP_DEFAULT_FORWARD_DELAY;
+       sc->sc_bridge_priority = BSTP_DEFAULT_BRIDGE_PRIORITY;
+       sc->sc_hold_time = BSTP_DEFAULT_HOLD_TIME;
+       sc->sc_filter_flags = IFBF_FILT_DEFAULT;
+#ifndef BRIDGE_IPF
+       /*
+        * For backwards compatibility with previous behaviour...
+        * Switch off filtering on the bridge itself if BRIDGE_IPF is
+        * not defined.
+        */
+       sc->sc_filter_flags &= ~IFBF_FILT_USEIPF;
+#endif
+       
+       /* Initialize our routing table. */
+       error = bridge_rtable_init(sc);
+       if (error != 0) {
+               printf("bridge_clone_create: bridge_rtable_init failed %d\n", error);
+               goto done;
+       }
+       
+       LIST_INIT(&sc->sc_iflist);
+
+       sc->sc_mtx = lck_mtx_alloc_init(bridge_lock_grp, bridge_lock_attr);
+       
+       /* use the interface name as the unique id for ifp recycle */
+       snprintf(sc->sc_if_xname, sizeof(sc->sc_if_xname), "%s%d",
+             ifc->ifc_name, unit);
+       memset(&init_params, 0, sizeof(struct ifnet_init_params));
+       init_params.uniqueid = sc->sc_if_xname;
+       init_params.uniqueid_len = strlen(sc->sc_if_xname);
+       init_params.name = ifc->ifc_name;
+       init_params.unit = unit;
+       init_params.family = IFNET_FAMILY_ETHERNET;
+       init_params.type = IFT_BRIDGE;
+       init_params.output = bridge_start;
+       init_params.demux = ether_demux;
+       init_params.add_proto = ether_add_proto;
+       init_params.del_proto = ether_del_proto;
+       init_params.check_multi = ether_check_multi;
+       init_params.framer = ether_frameout;
+       init_params.softc = sc;
+       init_params.ioctl = bridge_ioctl;
+       init_params.set_bpf_tap = bridge_set_bpf_tap;
+       init_params.detach = bridge_detach;
+       init_params.broadcast_addr = etherbroadcastaddr;
+       init_params.broadcast_len = ETHER_ADDR_LEN;
+       error = ifnet_allocate(&init_params, &ifp);
+       if (error != 0) {
+               printf("bridge_clone_create: ifnet_allocate failed %d\n", error);
+               goto done;
+       }
+       sc->sc_if = ifp;
+       
+       error = ifnet_set_mtu(ifp, ETHERMTU);
+       if (error != 0) {
+               printf("bridge_clone_create: ifnet_set_mtu failed %d\n", error);
+               goto done;
+       }
+       error = ifnet_set_addrlen(ifp, ETHER_ADDR_LEN);
+       if (error != 0) {
+               printf("bridge_clone_create: ifnet_set_addrlen failed %d\n", error);
+               goto done;
+       }
+       error = ifnet_set_baudrate(ifp, 10000000) ;     // XXX: this is what IONetworking does
+       if (error != 0) {
+               printf("bridge_clone_create: ifnet_set_baudrate failed %d\n", error);
+               goto done;
+       }
+       error = ifnet_set_hdrlen(ifp, ETHER_HDR_LEN);
+       if (error != 0) {
+               printf("bridge_clone_create: ifnet_set_hdrlen failed %d\n", error);
+               goto done;
+       }
+       error = ifnet_set_flags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_NOTRAILERS | IFF_MULTICAST, 
+                                                       0xffff);
+       if (error != 0) {
+               printf("bridge_clone_create: ifnet_set_flags failed %d\n", error);
+               goto done;
+       }
+       
+       /*
+        * Generate a random ethernet address and use the private AC:DE:48
+        * OUI code.
+        */
+       read_random(&r, sizeof(r));
+       eaddr[0] = 0xAC;
+       eaddr[1] = 0xDE;
+       eaddr[2] = 0x48;
+       eaddr[3] = (r >> 0)  & 0xffu;
+       eaddr[4] = (r >> 8)  & 0xffu;
+       eaddr[5] = (r >> 16) & 0xffu;
+       
+       memset(sdl, 0, sizeof(sdl_buffer));
+       sdl->sdl_family = AF_LINK;
+       sdl->sdl_nlen = strlen(sc->sc_if_xname);
+       sdl->sdl_alen = ETHER_ADDR_LEN;
+       sdl->sdl_len = offsetof(struct sockaddr_dl, sdl_data);
+       memcpy(sdl->sdl_data, sc->sc_if_xname, sdl->sdl_nlen);
+       memcpy(LLADDR(sdl), eaddr, ETHER_ADDR_LEN);
+       
+#if BRIDGE_DEBUG
+       link_print(sdl);
+#endif
+
+       error = ifnet_attach(ifp, NULL);
+       if (error != 0) {
+               printf("bridge_clone_create: ifnet_attach failed %d\n", error);
+               goto done;
+       }
+       
+       error = ifnet_set_lladdr_and_type(ifp, eaddr, ETHER_ADDR_LEN, IFT_ETHER);
+       if (error != 0) {
+               printf("bridge_clone_create: ifnet_set_lladdr_and_type failed %d\n", error);
+               goto done;
+       }
+       
+#if APPLE_BRIDGE_HWCKSUM_SUPPORT
+       /* 
+        * APPLE MODIFICATION - our bridge can support HW checksums 
+        * (useful if underlying interfaces support them) on TX,
+        * RX is not that interesting, since the stack just looks to
+        * see if the packet has been checksummed already (I think)
+        * but we might as well indicate we support it
+        */
+       ifp->if_capabilities =
+               IFCAP_CSUM_IPv4_Tx | IFCAP_CSUM_TCPv4_Tx | IFCAP_CSUM_UDPv4_Tx |
+               IFCAP_CSUM_IPv4_Rx | IFCAP_CSUM_TCPv4_Rx | IFCAP_CSUM_UDPv4_Rx ;
+#endif
+       
+       lck_rw_lock_exclusive(bridge_list_lock);
+       LIST_INSERT_HEAD(&bridge_list, sc, sc_list);
+       lck_rw_done(bridge_list_lock);
+
+       /* attach as ethernet */
+       error = bpf_attach(ifp, DLT_EN10MB, sizeof(struct ether_header), NULL, NULL);
+       
+done:
+       if (error != 0) {
+        printf("bridge_clone_create failed error %d\n", error);
+               /* Cleanup TBD */
+       }
+       
+       return error;
+}
+
+/*
+ * bridge_clone_destroy:
+ *
+ *     Destroy a bridge instance.
+ */
+static void
+bridge_clone_destroy(struct ifnet *ifp)
+{
+       struct bridge_softc *sc = (struct bridge_softc *)ifnet_softc(ifp);
+       struct bridge_iflist *bif;
+       int error;
+       
+       lck_mtx_lock(sc->sc_mtx);
+       if ((sc->sc_flags & SCF_DETACHING)) {
+               lck_mtx_unlock(sc->sc_mtx);
+               return;
+       }
+       sc->sc_flags |= SCF_DETACHING;
+       
+       bridge_stop(ifp, 1);
+       
+       error = ifnet_set_flags(ifp, 0, IFF_UP);
+       if (error != 0) {
+               printf("bridge_clone_destroy: ifnet_set_flags failed %d\n", error);
+       }
+       
+       while ((bif = LIST_FIRST(&sc->sc_iflist)) != NULL)
+               bridge_delete_member(sc, bif);
+       
+       lck_mtx_unlock(sc->sc_mtx);
+       
+       error = ifnet_detach(ifp);
+       if (error != 0) {
+               printf("bridge_clone_destroy: ifnet_detach failed %d\n", error);
+               if ((sc = (struct bridge_softc *)ifnet_softc(ifp)) != NULL) {
+                       lck_mtx_lock(sc->sc_mtx);
+                       sc->sc_flags &= ~SCF_DETACHING;
+                       lck_mtx_unlock(sc->sc_mtx);
+               }
+       }
+       
+       return;
+}
+
+#define DRVSPEC \
+       if (ifd->ifd_cmd >= bridge_control_table_size) { \
+               error = EINVAL; \
+               break; \
+       } \
+       bc = &bridge_control_table[ifd->ifd_cmd]; \
+        \
+       if ((cmd & IOC_DIRMASK) == IOC_INOUT && \
+               (bc->bc_flags & BC_F_COPYOUT) == 0) { \
+               error = EINVAL; \
+               break; \
+       } \
+       else if (((cmd & IOC_DIRMASK) == IOC_IN) && \
+                        (bc->bc_flags & BC_F_COPYOUT) != 0) { \
+               error = EINVAL; \
+               break; \
+       } \
+        \
+       if (bc->bc_flags & BC_F_SUSER) { \
+               error = kauth_authorize_generic(kauth_cred_get(), KAUTH_GENERIC_ISSUSER); \
+               if (error) \
+                       break; \
+       } \
+        \
+       if (ifd->ifd_len != bc->bc_argsize || \
+               ifd->ifd_len > sizeof(args)) { \
+               error = EINVAL; \
+               break; \
+       } \
+        \
+       memset(&args, 0, sizeof(args)); \
+       if (bc->bc_flags & BC_F_COPYIN) { \
+               error = copyin(ifd->ifd_data, &args, ifd->ifd_len); \
+               if (error) \
+                       break; \
+       } \
+        \
+       lck_mtx_lock(sc->sc_mtx); \
+       error = (*bc->bc_func)(sc, &args); \
+       lck_mtx_unlock(sc->sc_mtx); \
+       if (error) \
+               break; \
+        \
+       if (bc->bc_flags & BC_F_COPYOUT) \
+               error = copyout(&args, ifd->ifd_data, ifd->ifd_len)
+
+/*
+ * bridge_ioctl:
+ *
+ *     Handle a control request from the operator.
+ */
+static errno_t
+bridge_ioctl(ifnet_t ifp, unsigned long cmd, void *data)
+{
+       struct bridge_softc *sc = ifnet_softc(ifp);
+       struct ifreq *ifr = (struct ifreq *) data;
+       int error = 0;
+       
+       lck_mtx_assert(sc->sc_mtx, LCK_MTX_ASSERT_NOTOWNED);
+
+#if BRIDGE_DEBUG
+       printf("bridge_ioctl: ifp %p cmd 0x%08lx (%c%c [%lu] %c %lu)\n", 
+                  ifp, 
+                  cmd, 
+                  (cmd & IOC_IN) ? 'I' : ' ',
+                  (cmd & IOC_OUT) ? 'O' : ' ',
+                  IOCPARM_LEN(cmd),
+                  (char)IOCGROUP(cmd),
+                  cmd & 0xff);
+       printf("SIOCGDRVSPEC32 %lx SIOCGDRVSPEC64 %lx\n", SIOCGDRVSPEC32, SIOCGDRVSPEC64);
+#endif
+       
+       switch (cmd) {
+               case SIOCADDMULTI:
+                       break;
+               case SIOCDELMULTI:
+                       break;
+                       
+               case SIOCSDRVSPEC32:
+               case SIOCGDRVSPEC32: {
+                       union {
+                               struct ifbreq ifbreq;
+                               struct ifbifconf32 ifbifconf;
+                               struct ifbareq32 ifbareq;
+                               struct ifbaconf32 ifbaconf;
+                               struct ifbrparam ifbrparam;
+                       } args;
+                       struct ifdrv32 *ifd = (struct ifdrv32 *) data;
+                       const struct bridge_control *bridge_control_table = bridge_control_table32, *bc;
+                       
+                       DRVSPEC;
+                       
+                       break;
+               }
+               case SIOCSDRVSPEC64:
+               case SIOCGDRVSPEC64: {
+                       union {
+                               struct ifbreq ifbreq;
+                               struct ifbifconf64 ifbifconf;
+                               struct ifbareq64 ifbareq;
+                               struct ifbaconf64 ifbaconf;
+                               struct ifbrparam ifbrparam;
+                       } args;
+                       struct ifdrv64 *ifd = (struct ifdrv64 *) data;
+                       const struct bridge_control *bridge_control_table = bridge_control_table64, *bc;
+                       
+                       DRVSPEC;
+                       
+                       break;
+               }
+                       
+               case SIOCSIFFLAGS:
+                       if ((ifnet_flags(ifp) & (IFF_UP|IFF_RUNNING)) == IFF_RUNNING) {
+                               /*
+                                * If interface is marked down and it is running,
+                                * then stop and disable it.
+                                */
+                               lck_mtx_lock(sc->sc_mtx);
+                               bridge_stop(ifp, 1);
+                               lck_mtx_unlock(sc->sc_mtx);
+                       } else if ((ifnet_flags(ifp) & (IFF_UP|IFF_RUNNING)) == IFF_UP) {
+                               /*
+                                * If interface is marked up and it is stopped, then
+                                * start it.
+                                */
+                               lck_mtx_lock(sc->sc_mtx);
+                               error = bridge_init(ifp);
+                               lck_mtx_unlock(sc->sc_mtx);
+                       }
+                       break;
+                       
+               case SIOCSIFMTU:
+#if 0
+                       /* APPLE MODIFICATION <cbz@apple.com> 
+                        if we wanted to support changing the MTU */
+               {
+                       struct ifreq *ifr = (struct ifreq *)data;
+                       struct bridge_iflist *bif;
+                       struct ifnet *dst_if;
+                       sc->sc_if.if_mtu = ifr->ifr_mtu;
+                       LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
+                               dst_if = bif->bif_ifp;
+                               error = ifnet_ioctl(dst_if, 0, cmd, data);
+                               if (error)
+                                       break;
+                       }
+               }
+#else
+                       /* Do not allow the MTU to be changed on the bridge */
+                       error = EINVAL;
+#endif
+                       break;
+                       
+                       /* APPLE MODIFICATION - don't pass this down to ether_ioctl, just indicate we don't handle it */
+               case SIOCGIFMEDIA:
+                       error = EINVAL;
+                       break;
+                       
+               case SIOCSIFLLADDR:
+                       error = ifnet_set_lladdr(ifp, ifr->ifr_addr.sa_data, ifr->ifr_addr.sa_len);
+                       if (error != 0)
+                               printf("bridge_ioctl: ifnet_set_lladdr failed %d\n", error);
+                       break;
+                       
+               default:
+                       error = ether_ioctl(ifp, cmd, data);
+#if BRIDGE_DEBUG
+                       if (error != 0)
+                               printf("bridge_ioctl: ether_ioctl ifp %p cmd 0x%08lx (%c%c [%lu] %c %lu) failed error: %d\n", 
+                                          ifp, 
+                                          cmd, 
+                                          (cmd & IOC_IN) ? 'I' : ' ',
+                                          (cmd & IOC_OUT) ? 'O' : ' ',
+                                          IOCPARM_LEN(cmd),
+                                          (char) IOCGROUP(cmd),
+                                          cmd & 0xff,
+                                          error);
+#endif /* BRIDGE_DEBUG */
+                       break;
+       }
+       lck_mtx_assert(sc->sc_mtx, LCK_MTX_ASSERT_NOTOWNED);
+       
+       return (error);
+}
+
+/*
+ * bridge_mutecaps:
+ *
+ *     Clear or restore unwanted capabilities on the member interface
+ */
+#if HAS_IF_CAP
+void
+bridge_mutecaps(struct bridge_iflist *bif, int mute)
+{
+       struct ifnet *ifp = bif->bif_ifp;
+       struct ifcapreq ifcr;
+       
+       if (ifp->if_ioctl == NULL)
+               return;
+       
+       memset(&ifcr, 0, sizeof(ifcr));
+       ifcr.ifcr_capenable = ifp->if_capenable;
+       
+       if (mute) {
+               /* mask off and save capabilities */
+               bif->bif_mutecap = ifcr.ifcr_capenable & BRIDGE_IFCAPS_MASK;
+               if (bif->bif_mutecap != 0)
+                       ifcr.ifcr_capenable &= ~BRIDGE_IFCAPS_MASK;
+       } else
+       /* restore muted capabilities */
+               ifcr.ifcr_capenable |= bif->bif_mutecap;
+       
+       if (bif->bif_mutecap != 0) {
+               (void) (*ifp->if_ioctl)(ifp, SIOCSIFCAP, (caddr_t)&ifcr);
+       }
+}
+#endif /* HAS_IF_CAP */
+
+/*
+ * bridge_lookup_member:
+ */
+static struct bridge_iflist *
+bridge_lookup_member(struct bridge_softc *sc, const char *name)
+{
+       struct bridge_iflist *bif;
+       struct ifnet *ifp;
+       char if_xname[IFNAMSIZ];
+       
+       lck_mtx_assert(sc->sc_mtx, LCK_MTX_ASSERT_OWNED);
+
+       LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
+               ifp = bif->bif_ifp;
+               snprintf(if_xname, sizeof(if_xname), "%s%d", 
+                 ifnet_name(ifp), ifnet_unit(ifp));
+               if (strncmp(if_xname, name, sizeof(if_xname)) == 0)
+                       return (bif);
+       }
+       
+       return (NULL);
+}
+
+/*
+ * bridge_lookup_member_if:
+ */
+static struct bridge_iflist *
+bridge_lookup_member_if(struct bridge_softc *sc, struct ifnet *member_ifp)
+{
+       struct bridge_iflist *bif;
+       
+       lck_mtx_assert(sc->sc_mtx, LCK_MTX_ASSERT_OWNED);
+
+       LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
+               if (bif->bif_ifp == member_ifp)
+                       return (bif);
+       }
+       
+       return (NULL);
+}
+
+static errno_t 
+bridge_iff_input(void* cookie, ifnet_t ifp, __unused protocol_family_t protocol,
+                 mbuf_t *data, char **frame_ptr)
+{
+       errno_t error = 0;
+       struct bridge_iflist *bif = (struct bridge_iflist *)cookie;
+       struct bridge_softc *sc = bif->bif_sc;
+       int included = 0;
+       size_t frmlen = 0;
+       mbuf_t m = *data;
+
+       if ((m->m_flags & M_PROTO1))
+               goto out;
+       
+       if (*frame_ptr >= (char *)mbuf_datastart(m) && *frame_ptr <= (char *)mbuf_data(m)) {
+               included = 1;
+               frmlen = (char *)mbuf_data(m) - *frame_ptr;
+       }
+#if BRIDGE_DEBUG
+       if (_if_brige_debug) {
+               printf("bridge_iff_input %s%d from %s%d m %p data %p frame %p %s frmlen %lu\n", 
+                          ifnet_name(sc->sc_if), ifnet_unit(sc->sc_if),
+                          ifnet_name(ifp), ifnet_unit(ifp), 
+                          m, mbuf_data(m), *frame_ptr, included ? "inside" : "outside", frmlen);
+               
+               if (_if_brige_debug > 1) {
+                       printf_mbuf(m, "bridge_iff_input[", "\n");
+                       printf_ether_header((struct ether_header *)*frame_ptr);
+                       printf_mbuf_data(m, 0, 20);
+                       printf("\n");
+               }
+       }
+#endif /* BRIDGE_DEBUG */
+
+       /* Move data pointer to start of frame to the link layer header */
+       if (included) {
+               (void) mbuf_setdata(m, (char *)mbuf_data(m) - frmlen, mbuf_len(m) + frmlen);
+               (void) mbuf_pkthdr_adjustlen(m, frmlen);
+       } else {
+               printf("bridge_iff_input: frame_ptr outside mbuf\n");
+               goto out;
+       }
+       
+       error = bridge_input(bif, ifp, m, *frame_ptr);
+       
+       /* Adjust packet back to original */
+       if (error == 0) {
+               (void) mbuf_setdata(m, (char *)mbuf_data(m) + frmlen, mbuf_len(m) - frmlen);
+               (void) mbuf_pkthdr_adjustlen(m, -frmlen);
+       }
+#if BRIDGE_DEBUG
+       if (_if_brige_debug > 1) {
+               printf("\n");
+               printf_mbuf(m, "bridge_iff_input]", "\n");
+       }
+#endif /* BRIDGE_DEBUG */
+
+out:
+       lck_mtx_assert(sc->sc_mtx, LCK_MTX_ASSERT_NOTOWNED);
+       
+       return error;
+}
+
+
+#if BRIDGE_MEMBER_OUT_FILTER
+static errno_t
+bridge_iff_output(void *cookie, ifnet_t ifp, __unused protocol_family_t protocol, mbuf_t *data)
+{
+       errno_t error = 0;
+       struct bridge_iflist *bif = (struct bridge_iflist *)cookie;
+       struct bridge_softc *sc = bif->bif_sc;
+       mbuf_t m = *data;
+       
+       if ((m->m_flags & M_PROTO1))
+               goto out;
+       
+#if BRIDGE_DEBUG
+       if (_if_brige_debug) {
+               printf("bridge_iff_output %s%d from %s%d m %p data %p\n", 
+                               ifnet_name(sc->sc_if), ifnet_unit(sc->sc_if),
+                               ifnet_name(ifp), ifnet_unit(ifp), 
+                               m, mbuf_data(m));
+       }
+#endif /* BRIDGE_DEBUG */
+
+       error = bridge_output(sc, ifp, m);
+       if (error != 0) {
+               printf("bridge_iff_output: bridge_output failed error %d\n", error);
+       }
+
+out:   
+       lck_mtx_assert(sc->sc_mtx, LCK_MTX_ASSERT_NOTOWNED);
+
+       return error;
+}
+#endif /* BRIDGE_MEMBER_OUT_FILTER */
+
+
+static void 
+bridge_iff_event(void* cookie, ifnet_t ifp, __unused protocol_family_t protocol,
+                 const struct kev_msg *event_msg)
+{
+       struct bridge_iflist *bif = (struct bridge_iflist *)cookie;
+       
+       if (event_msg->vendor_code == KEV_VENDOR_APPLE && 
+               event_msg->kev_class == KEV_NETWORK_CLASS &&
+               event_msg->kev_subclass == KEV_DL_SUBCLASS) {
+               switch (event_msg->event_code) {
+                       case KEV_DL_IF_DETACHING:
+                               bridge_ifdetach(bif, ifp);
+                               break;
+                               
+                       default:
+                               break;
+               }
+       }               
+}
+
+static void 
+bridge_iff_detached(void* cookie, __unused ifnet_t interface)
+{
+       struct bridge_iflist *bif = (struct bridge_iflist *)cookie;
+       
+       _FREE(bif, M_DEVBUF);
+       
+       return;
+}
+
+/*
+ * bridge_delete_member:
+ *
+ *     Delete the specified member interface.
+ */
+static void
+bridge_delete_member(struct bridge_softc *sc, struct bridge_iflist *bif)
+{
+       struct ifnet *ifs = bif->bif_ifp;
+       
+       switch (ifnet_type(ifs)) {
+        case IFT_ETHER:
+            /*
+             * Take the interface out of promiscuous mode.
+             */
+            (void) ifnet_set_promiscuous(ifs, 0);
+            break;
+#if NGIF > 0
+        case IFT_GIF:
+            break;
+#endif
+        default:
+#ifdef DIAGNOSTIC
+            panic("bridge_delete_member: impossible");
+#endif
+            break;
+       }
+
+       ifs->if_bridge = NULL;
+       LIST_REMOVE(bif, bif_next);
+
+       /* Respect lock ordering with DLIL lock */
+       lck_mtx_unlock(sc->sc_mtx);
+       iflt_detach(bif->bif_iff_ref);
+       lck_mtx_lock(sc->sc_mtx);
+       
+       bridge_rtdelete(sc, ifs, IFBF_FLUSHALL);
+       
+       if (ifnet_flags(sc->sc_if) & IFF_RUNNING)
+               bstp_initialization(sc);
+       
+       /* On the last deleted interface revert the MTU */
+       
+       if (LIST_EMPTY(&sc->sc_iflist))
+               (void) ifnet_set_mtu(sc->sc_if, ETHERMTU);
+}
+
+static int
+bridge_ioctl_add(struct bridge_softc *sc, void *arg)
+{
+       struct ifbreq *req = arg;
+       struct bridge_iflist *bif = NULL;
+       struct ifnet *ifs;
+       int error = 0;
+       /* APPLE MODIFICATION <cbz@apple.com> - is this a proxy sta being added? */
+#if IEEE80211_PROXYSTA
+       struct bridge_rtnode *brt;
+#endif
+       
+       error = ifnet_find_by_name(req->ifbr_ifsname, &ifs);
+       if (error || ifs == NULL)
+               return (ENOENT);
+       
+       /* Is the interface already attached to this bridge interface */
+       if (ifs->if_bridge == sc)
+               return (EEXIST);
+       
+       if (ifs->if_bridge != NULL)
+               return (EBUSY);
+       
+       /* First added interface resets the MTU */
+       
+       if (LIST_EMPTY(&sc->sc_iflist))
+               (void) ifnet_set_mtu(sc->sc_if, ETHERMTU);
+       
+       if (ifnet_mtu(sc->sc_if) != ifnet_mtu(ifs))
+               return (EINVAL);
+
+       bif = _MALLOC(sizeof(*bif), M_DEVBUF, M_WAITOK|M_ZERO);
+       if (bif == NULL)
+               return (ENOMEM);
+       
+       bif->bif_ifp = ifs;
+       bif->bif_flags = IFBIF_LEARNING | IFBIF_DISCOVER;
+       bif->bif_priority = BSTP_DEFAULT_PORT_PRIORITY;
+       bif->bif_path_cost = BSTP_DEFAULT_PATH_COST;
+       bif->bif_sc = sc;
+       
+       switch (ifnet_type(ifs)) {
+        case IFT_ETHER:
+            /*
+             * Place the interface into promiscuous mode.
+             */
+            error = ifnet_set_promiscuous(ifs, 1);
+            if (error)
+                goto out;
+#if HAS_IF_CAP            
+            bridge_mutecaps(bif, 1);
+#endif
+            break;
+#if NGIF > 0
+            case IFT_GIF:
+            break;
+#endif
+            default:
+            error = EINVAL;
+            goto out;
+       }
+       
+       /*
+        * If the LINK0 flag is set, and this is the first member interface,
+        * attempt to inherit its link-layer address.
+        */
+       if ((ifnet_flags(sc->sc_if) & IFF_LINK0) && LIST_EMPTY(&sc->sc_iflist) &&
+           ifnet_type(ifs) == IFT_ETHER) {
+           (void) ifnet_set_lladdr(sc->sc_if, ifnet_lladdr(ifs),
+                                                       ETHER_ADDR_LEN);
+       }
+       
+       // install an interface filter
+       {
+               struct iff_filter iff;
+               
+               memset(&iff, 0, sizeof(struct iff_filter));
+               
+               iff.iff_cookie = bif;
+               iff.iff_name = "com.apple.kernel.bsd.net.if_bridge";
+               iff.iff_input = bridge_iff_input;
+#if BRIDGE_MEMBER_OUT_FILTER
+               iff.iff_output = bridge_iff_output;
+#endif /* BRIDGE_MEMBER_OUT_FILTER */
+               iff.iff_event = bridge_iff_event;
+               iff.iff_detached = bridge_iff_detached;
+               
+               /* Respect lock ordering with DLIL lock */
+               lck_mtx_unlock(sc->sc_mtx);
+               error = iflt_attach(ifs, &iff, &bif->bif_iff_ref);
+               lck_mtx_lock(sc->sc_mtx);
+               if (error != 0) {
+                       printf("bridge_ioctl_add: iflt_attach failed %d\n", error);
+                       goto out;
+               }
+       }
+       ifs->if_bridge = sc;
+       LIST_INSERT_HEAD(&sc->sc_iflist, bif, bif_next);
+       
+       
+       if (ifnet_flags(sc->sc_if) & IFF_RUNNING)
+               bstp_initialization(sc);
+       else
+               bstp_stop(sc);
+       
+       /* APPLE MODIFICATION <cbz@apple.com> - is this a proxy sta being added? */
+#if IEEE80211_PROXYSTA
+       brt = bridge_rtnode_lookup(sc, ifnet_lladdr(ifs));
+       if (brt) {
+#if DIAGNOSTIC
+               printf( "%s: attach %s to bridge as proxysta for %02x:%02x:%02x:%02x:%02x:%02x discovered on %s\n",
+               __func__, ifs->if_xname, brt->brt_addr[0], brt->brt_addr[1], brt->brt_addr[2], 
+               brt->brt_addr[3], brt->brt_addr[4], brt->brt_addr[5], brt->brt_ifp->if_xname );
+#endif
+               brt->brt_ifp_proxysta = ifs;
+       }
+#endif
+       
+       
+out:
+       if (error) {
+               if (bif != NULL)
+                       _FREE(bif, M_DEVBUF);
+       }
+       return (error);
+}
+
+static int
+bridge_ioctl_del(struct bridge_softc *sc, void *arg)
+{
+       struct ifbreq *req = arg;
+       struct bridge_iflist *bif;
+       
+       bif = bridge_lookup_member(sc, req->ifbr_ifsname);
+       if (bif == NULL)
+               return (ENOENT);
+       
+       bridge_delete_member(sc, bif);
+       
+       return (0);
+}
+
+/* APPLE MODIFICATION <cbz@apple.com> - add support for Proxy STA */
+#if IEEE80211_PROXYSTA
+static int
+bridge_ioctl_purge(struct bridge_softc *sc, void *arg)
+{
+       struct ifbreq *req = arg;
+       struct bridge_iflist *bif;
+       struct ifnet *ifs;
+       
+       bif = bridge_lookup_member(sc, req->ifbr_ifsname);
+       if (bif == NULL)
+               return (ENOENT);
+       
+       ifs = bif->bif_ifp;
+       bridge_rtpurge(sc, ifs);
+       
+       return (0);
+}
+#endif
+
+static int
+bridge_ioctl_gifflags(struct bridge_softc *sc, void *arg)
+{
+       struct ifbreq *req = arg;
+       struct bridge_iflist *bif;
+       
+       bif = bridge_lookup_member(sc, req->ifbr_ifsname);
+       if (bif == NULL)
+               return (ENOENT);
+       
+       req->ifbr_ifsflags = bif->bif_flags;
+       req->ifbr_state = bif->bif_state;
+       req->ifbr_priority = bif->bif_priority;
+       req->ifbr_path_cost = bif->bif_path_cost;
+       req->ifbr_portno = ifnet_index(bif->bif_ifp) & 0xffff;
+       
+       return (0);
+}
+
+static int
+bridge_ioctl_sifflags(struct bridge_softc *sc, void *arg)
+{
+       struct ifbreq *req = arg;
+       struct bridge_iflist *bif;
+       
+       bif = bridge_lookup_member(sc, req->ifbr_ifsname);
+       if (bif == NULL)
+               return (ENOENT);
+       
+       if (req->ifbr_ifsflags & IFBIF_STP) {
+               switch (ifnet_type(bif->bif_ifp)) {
+            case IFT_ETHER:
+                /* These can do spanning tree. */
+                break;
+                
+            default:
+                /* Nothing else can. */
+                return (EINVAL);
+               }
+       }
+       
+       /* APPLE MODIFICATION <cbz@apple.com> - add support for Proxy STA */
+#if IEEE80211_PROXYSTA
+       if ((bif->bif_flags & IFBIF_PROXYSTA_DISCOVER) && 
+           ((req->ifbr_ifsflags & IFBIF_PROXYSTA_DISCOVER) == 0))
+               bridge_rtpurge(sc, bif->bif_ifp);
+#endif
+       
+       bif->bif_flags = req->ifbr_ifsflags;
+       
+       if (ifnet_flags(sc->sc_if) & IFF_RUNNING)
+               bstp_initialization(sc);
+       
+       /* APPLE MODIFICATION <cbz@apple.com> - add support for Proxy STA */
+#if IEEE80211_PROXYSTA
+       if (bif->bif_flags & IFBIF_PROXYSTA_DISCOVER)
+               bridge_rtdiscovery(sc);
+#endif
+       
+       return (0);
+}
+
+static int
+bridge_ioctl_scache(struct bridge_softc *sc, void *arg)
+{
+       struct ifbrparam *param = arg;
+       
+       sc->sc_brtmax = param->ifbrp_csize;
+       bridge_rttrim(sc);
+       
+       return (0);
+}
+
+static int
+bridge_ioctl_gcache(struct bridge_softc *sc, void *arg)
+{
+       struct ifbrparam *param = arg;
+       
+       param->ifbrp_csize = sc->sc_brtmax;
+       
+       return (0);
+}
+
+#define BRIDGE_IOCTL_GIFS \
+       struct bridge_iflist *bif; \
+       struct ifbreq breq; \
+       int count, error = 0; \
+       uint32_t len; \
+       \
+       count = 0; \
+       LIST_FOREACH(bif, &sc->sc_iflist, bif_next) \
+       count++; \
+       \
+       if (bifc->ifbic_len == 0) { \
+               bifc->ifbic_len = sizeof(breq) * count; \
+               return (0); \
+       } \
+       \
+       count = 0; \
+       len = bifc->ifbic_len; \
+       memset(&breq, 0, sizeof breq); \
+       LIST_FOREACH(bif, &sc->sc_iflist, bif_next) { \
+               if (len < sizeof(breq)) \
+                       break; \
+       \
+               snprintf(breq.ifbr_ifsname, sizeof(breq.ifbr_ifsname), "%s%d", \
+                 ifnet_name(bif->bif_ifp), ifnet_unit(bif->bif_ifp)); \
+               breq.ifbr_ifsflags = bif->bif_flags; \
+               breq.ifbr_state = bif->bif_state; \
+               breq.ifbr_priority = bif->bif_priority; \
+               breq.ifbr_path_cost = bif->bif_path_cost; \
+               breq.ifbr_portno = ifnet_index(bif->bif_ifp) & 0xffff; \
+               error = copyout(&breq, bifc->ifbic_req + count * sizeof(breq), sizeof(breq)); \
+               if (error) \
+                       break; \
+               count++; \
+               len -= sizeof(breq); \
+       } \
+       \
+       bifc->ifbic_len = sizeof(breq) * count
+
+
+static int
+bridge_ioctl_gifs64(struct bridge_softc *sc, void *arg)
+{
+       struct ifbifconf64 *bifc = arg;
+       
+       BRIDGE_IOCTL_GIFS;
+
+       return (error);
+}
+
+static int
+bridge_ioctl_gifs32(struct bridge_softc *sc, void *arg)
+{
+       struct ifbifconf32 *bifc = arg;
+
+       BRIDGE_IOCTL_GIFS;
+
+       return (error);
+}
+
+#define BRIDGE_IOCTL_RTS \
+       struct bridge_rtnode *brt; \
+       int count = 0, error = 0; \
+       uint32_t len; \
+       struct timespec now; \
+        \
+       if (bac->ifbac_len == 0) \
+               return (0); \
+        \
+       len = bac->ifbac_len; \
+       LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) { \
+               if (len < sizeof(bareq)) \
+                       goto out; \
+               memset(&bareq, 0, sizeof(bareq)); \
+               snprintf(bareq.ifba_ifsname, sizeof(bareq.ifba_ifsname), "%s%d", \
+                 ifnet_name(brt->brt_ifp), ifnet_unit(brt->brt_ifp)); \
+               memcpy(bareq.ifba_dst, brt->brt_addr, sizeof(brt->brt_addr)); \
+               if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) { \
+                       nanouptime(&now); \
+                       if (brt->brt_expire >= (unsigned long)now.tv_sec) \
+                               bareq.ifba_expire = brt->brt_expire - now.tv_sec; \
+                       else \
+                               bareq.ifba_expire = 0; \
+               } else \
+                       bareq.ifba_expire = 0; \
+               bareq.ifba_flags = brt->brt_flags; \
+                \
+               error = copyout(&bareq, bac->ifbac_req + count * sizeof(bareq), sizeof(bareq)); \
+               if (error) \
+                       goto out; \
+               count++; \
+               len -= sizeof(bareq); \
+       } \
+out: \
+       bac->ifbac_len = sizeof(bareq) * count
+       
+
+static int
+bridge_ioctl_rts64(struct bridge_softc *sc, void *arg)
+{
+       struct ifbaconf64 *bac = arg;
+       struct ifbareq64 bareq;
+       
+       BRIDGE_IOCTL_RTS;
+
+       return (error);
+}
+
+static int
+bridge_ioctl_rts32(struct bridge_softc *sc, void *arg)
+{
+       struct ifbaconf32 *bac = arg;
+       struct ifbareq32 bareq;
+       
+       BRIDGE_IOCTL_RTS;
+
+       return (error);
+}
+
+static int
+bridge_ioctl_saddr64(struct bridge_softc *sc, void *arg)
+{
+       struct ifbareq64 *req = arg;
+       struct bridge_iflist *bif;
+       int error;
+       
+       bif = bridge_lookup_member(sc, req->ifba_ifsname);
+       if (bif == NULL)
+               return (ENOENT);
+       
+       error = bridge_rtupdate(sc, req->ifba_dst, bif->bif_ifp, 1,
+                            req->ifba_flags);
+       
+       return (error);
+}
+
+static int
+bridge_ioctl_saddr32(struct bridge_softc *sc, void *arg)
+{
+       struct ifbareq32 *req = arg;
+       struct bridge_iflist *bif;
+       int error;
+       
+       bif = bridge_lookup_member(sc, req->ifba_ifsname);
+       if (bif == NULL)
+               return (ENOENT);
+       
+       error = bridge_rtupdate(sc, req->ifba_dst, bif->bif_ifp, 1,
+                            req->ifba_flags);
+       
+       return (error);
+}
+
+static int
+bridge_ioctl_sto(struct bridge_softc *sc, void *arg)
+{
+       struct ifbrparam *param = arg;
+       
+       sc->sc_brttimeout = param->ifbrp_ctime;
+       
+       return (0);
+}
+
+static int
+bridge_ioctl_gto(struct bridge_softc *sc, void *arg)
+{
+       struct ifbrparam *param = arg;
+       
+       param->ifbrp_ctime = sc->sc_brttimeout;
+       
+       return (0);
+}
+
+static int
+bridge_ioctl_daddr64(struct bridge_softc *sc, void *arg)
+{
+       struct ifbareq64 *req = arg;
+       
+       return (bridge_rtdaddr(sc, req->ifba_dst));
+}
+
+static int
+bridge_ioctl_daddr32(struct bridge_softc *sc, void *arg)
+{
+       struct ifbareq32 *req = arg;
+       
+       return (bridge_rtdaddr(sc, req->ifba_dst));
+}
+
+static int
+bridge_ioctl_flush(struct bridge_softc *sc, void *arg)
+{
+       struct ifbreq *req = arg;
+       
+       bridge_rtflush(sc, req->ifbr_ifsflags);
+       
+       return (0);
+}
+
+static int
+bridge_ioctl_gpri(struct bridge_softc *sc, void *arg)
+{
+       struct ifbrparam *param = arg;
+       
+       param->ifbrp_prio = sc->sc_bridge_priority;
+       
+       return (0);
+}
+
+static int
+bridge_ioctl_spri(struct bridge_softc *sc, void *arg)
+{
+       struct ifbrparam *param = arg;
+       
+       sc->sc_bridge_priority = param->ifbrp_prio;
+       
+       if (ifnet_flags(sc->sc_if) & IFF_RUNNING)
+               bstp_initialization(sc);
+       
+       return (0);
+}
+
+static int
+bridge_ioctl_ght(struct bridge_softc *sc, void *arg)
+{
+       struct ifbrparam *param = arg;
+       
+       param->ifbrp_hellotime = sc->sc_bridge_hello_time >> 8;
+       
+       return (0);
+}
+
+static int
+bridge_ioctl_sht(struct bridge_softc *sc, void *arg)
+{
+       struct ifbrparam *param = arg;
+       
+       if (param->ifbrp_hellotime == 0)
+               return (EINVAL);
+       sc->sc_bridge_hello_time = param->ifbrp_hellotime << 8;
+       
+       if (ifnet_flags(sc->sc_if) & IFF_RUNNING)
+               bstp_initialization(sc);
+       
+       return (0);
+}
+
+static int
+bridge_ioctl_gfd(struct bridge_softc *sc, void *arg)
+{
+       struct ifbrparam *param = arg;
+       
+       param->ifbrp_fwddelay = sc->sc_bridge_forward_delay >> 8;
+       
+       return (0);
+}
+
+static int
+bridge_ioctl_sfd(struct bridge_softc *sc, void *arg)
+{
+       struct ifbrparam *param = arg;
+       
+       if (param->ifbrp_fwddelay == 0)
+               return (EINVAL);
+       sc->sc_bridge_forward_delay = param->ifbrp_fwddelay << 8;
+       
+       if (ifnet_flags(sc->sc_if) & IFF_RUNNING)
+               bstp_initialization(sc);
+       
+       return (0);
+}
+
+static int
+bridge_ioctl_gma(struct bridge_softc *sc, void *arg)
+{
+       struct ifbrparam *param = arg;
+       
+       param->ifbrp_maxage = sc->sc_bridge_max_age >> 8;
+       
+       return (0);
+}
+
+static int
+bridge_ioctl_sma(struct bridge_softc *sc, void *arg)
+{
+       struct ifbrparam *param = arg;
+       
+       if (param->ifbrp_maxage == 0)
+               return (EINVAL);
+       sc->sc_bridge_max_age = param->ifbrp_maxage << 8;
+       
+       if (ifnet_flags(sc->sc_if) & IFF_RUNNING)
+               bstp_initialization(sc);
+       
+       return (0);
+}
+
+static int
+bridge_ioctl_sifprio(struct bridge_softc *sc, void *arg)
+{
+       struct ifbreq *req = arg;
+       struct bridge_iflist *bif;
+       
+       bif = bridge_lookup_member(sc, req->ifbr_ifsname);
+       if (bif == NULL)
+               return (ENOENT);
+       
+       bif->bif_priority = req->ifbr_priority;
+       
+       if (ifnet_flags(sc->sc_if) & IFF_RUNNING)
+               bstp_initialization(sc);
+       
+       return (0);
+}
+
+/* APPLE MODIFICATION <cbz@apple.com> - add support for Proxy STA */
+#if IEEE80211_PROXYSTA
+static void
+bridge_proxysta_notify_macaddr(struct ifnet *ifp, int op, const uint8_t *mac)
+{
+       struct proxy_sta_event iev;
+       
+       memset(&iev, 0, sizeof(iev));
+       memcpy(iev.iev_addr, mac, ETHER_ADDR_LEN);
+       
+       rt_proxystamsg(ifp, op, &iev, sizeof(iev));
+}
+
+static void
+bridge_proxysta_discover(struct ifnet *ifp, const uint8_t *mac)
+{
+       bridge_proxysta_notify_macaddr( ifp, RTM_PROXYSTA_DISCOVERY, mac );
+}
+
+static void
+bridge_proxysta_idle_timeout(struct ifnet *ifp, const uint8_t *mac)
+{
+       bridge_proxysta_notify_macaddr( ifp, RTM_PROXYSTA_IDLE_TIMEOUT, mac );
+}
+#endif
+
+static int
+bridge_ioctl_sifcost(struct bridge_softc *sc, void *arg)
+{
+       struct ifbreq *req = arg;
+       struct bridge_iflist *bif;
+       
+       bif = bridge_lookup_member(sc, req->ifbr_ifsname);
+       if (bif == NULL)
+               return (ENOENT);
+       
+       bif->bif_path_cost = req->ifbr_path_cost;
+       
+       if (ifnet_flags(sc->sc_if) & IFF_RUNNING)
+               bstp_initialization(sc);
+       
+       return (0);
+}
+
+/*
+ * bridge_ifdetach:
+ *
+ *     Detach an interface from a bridge.  Called when a member
+ *     interface is detaching.
+ */
+static void
+bridge_ifdetach(struct bridge_iflist *bif, struct ifnet *ifp)
+{
+       struct bridge_softc *sc = bif->bif_sc;
+       struct ifbreq breq;
+       
+       memset(&breq, 0, sizeof(breq));
+       snprintf(breq.ifbr_ifsname, sizeof(breq.ifbr_ifsname),  "%s%d",
+             ifnet_name(ifp), ifnet_unit(ifp));
+       
+       lck_mtx_lock(sc->sc_mtx);
+       
+       (void) bridge_ioctl_del(sc, &breq);
+       
+       lck_mtx_unlock(sc->sc_mtx);
+}
+
+/*
+ * bridge_init:
+ *
+ *     Initialize a bridge interface.
+ */
+static int
+bridge_init(struct ifnet *ifp)
+{
+       struct bridge_softc *sc = ifnet_softc(ifp);
+       struct timespec ts;
+       errno_t error;
+       
+       if (ifnet_flags(ifp) & IFF_RUNNING)
+               return (0);
+       
+       ts.tv_sec = bridge_rtable_prune_period;
+       ts.tv_nsec = 0;
+       bsd_timeout(bridge_timer, sc, &ts);
+       
+       error = ifnet_set_flags(ifp, IFF_RUNNING, IFF_RUNNING);
+       if (error == 0)
+               bstp_initialization(sc);
+       
+       return error;
+}
+
+/*
+ * bridge_stop:
+ *
+ *     Stop the bridge interface.
+ */
+static void
+bridge_stop(struct ifnet *ifp, __unused int disable)
+{
+       struct bridge_softc *sc = ifnet_softc(ifp);
+       
+       if ((ifnet_flags(ifp) & IFF_RUNNING) == 0)
+               return;
+       
+       bsd_untimeout(bridge_timer, sc);
+       bstp_stop(sc);
+               
+       bridge_rtflush(sc, IFBF_FLUSHDYN);
+       
+       (void) ifnet_set_flags(ifp, 0, IFF_RUNNING);
+}
+
+/*
+ * bridge_enqueue:
+ *
+ *     Enqueue a packet on a bridge member interface.
+ *
+ *     Note: this is called both on the input and output path so this routine 
+ *     cannot simply muck with the HW checksum flag. For the time being we
+ *     rely on the caller to do the right thing.
+ */
+__private_extern__ void
+bridge_enqueue(struct bridge_softc *sc, struct ifnet *dst_ifp, struct mbuf *m)
+{
+       int len, error;
+
+       lck_mtx_assert(sc->sc_mtx, LCK_MTX_ASSERT_OWNED);
+
+#if BRIDGE_DEBUG       
+       if (_if_brige_debug)
+               printf("bridge_enqueue sc %s%d to dst_ifp %s%d m %p\n", 
+                       ifnet_name(sc->sc_if), ifnet_unit(sc->sc_if), 
+                       ifnet_name(dst_ifp), ifnet_unit(dst_ifp), m);
+#endif /* BRIDGE_DEBUG */
+        
+       len = m->m_pkthdr.len;
+       m->m_flags |= M_PROTO1; //set to avoid loops 
+       
+       error = ifnet_output_raw(dst_ifp, 0, m);
+       if (error == 0) {
+               (void) ifnet_stat_increment_out(sc->sc_if, 1, len, 0);
+       } else {
+               (void) ifnet_stat_increment_out(sc->sc_if, 0, 0, 1);
+       }
+       
+       return;
+}
+
+
+#if BRIDGE_MEMBER_OUT_FILTER
+
+/*
+ * bridge_output:
+ *
+ *     Send output from a bridge member interface.  This
+ *     performs the bridging function for locally originated
+ *     packets.
+ *
+ *     The mbuf has the Ethernet header already attached.  We must
+ *     enqueue or free the mbuf before returning.
+ */
+static int
+bridge_output(struct bridge_softc *sc, ifnet_t ifp, mbuf_t m)
+{
+       struct ether_header *eh;
+       struct ifnet *dst_if;
+       
+#if BRIDGE_DEBUG
+       if (_if_brige_debug)
+               printf("bridge_output ifp %p %s%d\n", ifp, ifnet_name(ifp), ifnet_unit(ifp));
+#endif /* BRIDGE_DEBUG */
+       
+       if (m->m_len < ETHER_HDR_LEN) {
+               m = m_pullup(m, ETHER_HDR_LEN);
+               if (m == NULL) {
+                       printf("bridge_output ifp %p m_pullup failed\n", ifp);
+                       return EJUSTRETURN;
+               }
+       }
+       
+       eh = mtod(m, struct ether_header *);
+
+       /* APPLE MODIFICATION <jhw@apple.com>
+        * If the packet is an 802.1X ethertype, then only send on the
+        * original output interface.
+        */
+       if (eh->ether_type == htons(ETHERTYPE_PAE)) {
+               dst_if = ifp;
+               goto sendunicast;
+       }
+       
+       /*
+        * If bridge is down, but the original output interface is up,
+        * go ahead and send out that interface.  Otherwise, the packet
+        * is dropped below.
+        */
+       if ((ifnet_flags(sc->sc_if) & IFF_RUNNING) == 0) {
+               dst_if = ifp;
+               goto sendunicast;
+       }
+       
+       lck_mtx_lock(sc->sc_mtx);
+       
+       /*
+        * If the packet is a multicast, or we don't know a better way to
+        * get there, send to all interfaces.
+        */
+       if (ETHER_IS_MULTICAST(eh->ether_dhost))
+               dst_if = NULL;
+       else
+               dst_if = bridge_rtlookup(sc, eh->ether_dhost);
+       if (dst_if == NULL) {
+               struct bridge_iflist *bif;
+               struct mbuf *mc;
+               int used = 0;
+               
+               LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
+                       dst_if = bif->bif_ifp;
+                       if ((ifnet_flags(dst_if) & IFF_RUNNING) == 0)
+                               continue;
+                       
+                       /*
+                        * If this is not the original output interface,
+                        * and the interface is participating in spanning
+                        * tree, make sure the port is in a state that
+                        * allows forwarding.
+                        */
+                       if (dst_if != ifp &&
+                               (bif->bif_flags & IFBIF_STP) != 0) {
+                               switch (bif->bif_state) {
+                                       case BSTP_IFSTATE_BLOCKING:
+                                       case BSTP_IFSTATE_LISTENING:
+                                       case BSTP_IFSTATE_DISABLED:
+                                               continue;
+                               }
+                       }
+                       
+                       if (LIST_NEXT(bif, bif_next) == NULL) {
+                               used = 1;
+                               mc = m;
+                       } else {
+                               mc = m_copym(m, 0, M_COPYALL, M_NOWAIT);
+                               if (mc == NULL) {
+                                       printf("bridge_output ifp %p m_copym failed\n", ifp);
+                                       (void) ifnet_stat_increment_out(sc->sc_if, 0, 0, 1);
+                                       continue;
+                               }
+                       }
+                       
+                       bridge_enqueue(sc, dst_if, mc);
+               }
+               if (used == 0) {
+                       printf("bridge_output ifp %p not used\n", ifp);
+                       m_freem(m);
+               }
+               lck_mtx_unlock(sc->sc_mtx);
+               
+               return EJUSTRETURN;
+       }
+       
+sendunicast:
+       /*
+        * XXX Spanning tree consideration here?
+        */
+       
+       if ((ifnet_flags(dst_if) & IFF_RUNNING) == 0) {
+               printf("bridge_output ifp %p dst_if %p not running\n", ifp, dst_if);
+               m_freem(m);
+                               
+               return EJUSTRETURN;
+       }
+       
+       if (dst_if != ifp) {
+               lck_mtx_lock(sc->sc_mtx);
+
+               bridge_enqueue(sc, dst_if, m);
+       
+               lck_mtx_unlock(sc->sc_mtx);
+
+               return EJUSTRETURN;
+       }
+               
+       return (0);
+}
+#endif /* BRIDGE_MEMBER_OUT_FILTER */
+
+#if APPLE_BRIDGE_HWCKSUM_SUPPORT
+static struct mbuf* bridge_fix_txcsum( struct mbuf *m )
+{
+       //      basic tests indicate that the vast majority of packets being processed
+       //      here have an Ethernet header mbuf pre-pended to them (the first case below)
+       //      the second highest are those where the Ethernet and IP/TCP/UDP headers are 
+       //      all in one mbuf (second case below)
+       //      the third case has, in fact, never hit for me -- although if I comment out 
+       //      the first two cases, that code works for them, so I consider it a 
+       //      decent general solution
+       
+       int amt = ETHER_HDR_LEN;
+       int hlen = M_CSUM_DATA_IPv4_IPHL( m->m_pkthdr.csum_data );
+       int off = M_CSUM_DATA_IPv4_OFFSET( m->m_pkthdr.csum_data );
+       
+       /* 
+        * NOTE we should never get vlan-attached packets here;
+        * support for those COULD be added, but we don't use them
+        * and it really kinda slows things down to worry about them
+        */
+       
+#ifdef DIAGNOSTIC
+       if ( m_tag_find( m, PACKET_TAG_VLAN, NULL ) != NULL )
+       {
+               printf( "bridge: transmitting packet tagged with VLAN?\n" );
+               KASSERT( 0 );
+               m_freem( m );
+               return NULL;
+       }
+#endif
+       
+       if ( m->m_pkthdr.csum_flags & M_CSUM_IPv4 )
+       {
+               amt += hlen;
+       }
+       if ( m->m_pkthdr.csum_flags & M_CSUM_TCPv4 )
+       {
+               amt += off + sizeof( uint16_t );
+       }
+       
+       if ( m->m_pkthdr.csum_flags & M_CSUM_UDPv4 )
+       {
+               amt += off + sizeof( uint16_t );
+       }
+       
+       if ( m->m_len == ETHER_HDR_LEN )
+       {
+               // this is the case where there's an Ethernet header in an mbuf
+        
+               // the first mbuf is the Ethernet header -- just strip it off and do the checksum
+               struct mbuf *m_ip = m->m_next;
+        
+               // set up m_ip so the cksum operations work
+               /* APPLE MODIFICATION 22 Apr 2008 <mvega@apple.com>
+                *  <rdar://5817385> Clear the m_tag list before setting
+                *  M_PKTHDR.
+                *
+                *  If this m_buf chain was extended via M_PREPEND(), then
+                *  m_ip->m_pkthdr is identical to m->m_pkthdr (see
+                *  M_MOVE_PKTHDR()). The only thing preventing access to this
+                *  invalid packet header data is the fact that the M_PKTHDR
+                *  flag is clear, i.e., m_ip->m_flag & M_PKTHDR == 0, but we're
+                *  about to set the M_PKTHDR flag, so to be safe we initialize,
+                *  more accurately, we clear, m_ip->m_pkthdr.tags via
+                *  m_tag_init().
+                *
+                *  Suppose that we do not do this; if m_pullup(), below, fails,
+                *  then m_ip will be freed along with m_ip->m_pkthdr.tags, but
+                *  we will also free m soon after, via m_freem(), and
+                *  consequently attempt to free m->m_pkthdr.tags in the
+                *  process. The problem is that m->m_pkthdr.tags will have
+                *  already been freed by virtue of being equal to
+                *  m_ip->m_pkthdr.tags. Attempts to dereference
+                *  m->m_pkthdr.tags in m_tag_delete_chain() will result in a
+                *  panic.
+                */
+               m_tag_init(m_ip);
+               /* END MODIFICATION */
+               m_ip->m_flags |= M_PKTHDR;
+               m_ip->m_pkthdr.csum_flags = m->m_pkthdr.csum_flags;
+               m_ip->m_pkthdr.csum_data = m->m_pkthdr.csum_data;
+               m_ip->m_pkthdr.len = m->m_pkthdr.len - ETHER_HDR_LEN;
+        
+               // set up the header mbuf so we can prepend it back on again later
+               m->m_pkthdr.csum_flags = 0;
+               m->m_pkthdr.csum_data = 0;
+               m->m_pkthdr.len = ETHER_HDR_LEN;
+               m->m_next = NULL;
+        
+        
+               // now do the checksums we need -- first IP
+               if ( m_ip->m_pkthdr.csum_flags & M_CSUM_IPv4 )
+               {
+                       // make sure the IP header (or at least the part with the cksum) is there
+                       m_ip = m_pullup( m_ip, sizeof( struct ip ) );
+                       if ( m_ip == NULL )
+                       {
+                               printf( "bridge: failed to flatten header\n ");
+                               m_freem( m );
+                               return NULL;
+                       }
+                       
+                       // now do the checksum
+                       {
+                               struct ip *ip = mtod( m_ip, struct ip* );
+                               ip->ip_sum = in_cksum( m_ip, hlen );
+                
+#ifdef VERY_VERY_VERY_DIAGNOSTIC
+                               printf( "bridge: performed IPv4 checksum\n" );
+#endif
+                       }
+               }
+        
+               // now do a TCP or UDP delayed checksum
+               if ( m_ip->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_UDPv4) )
+               {
+                       in_delayed_cksum( m_ip );
+            
+#ifdef VERY_VERY_VERY_DIAGNOSTIC
+                       printf( "bridge: performed TCPv4/UDPv4 checksum\n" );
+#endif
+               }
+        
+               // now attach the ethernet header back onto the IP packet
+               m->m_next = m_ip;
+               m->m_pkthdr.len += m_length( m_ip );    
+        
+               // clear the M_PKTHDR flags on the ip packet (again, we re-attach later)
+               m_ip->m_flags &= ~M_PKTHDR;
+        
+               // and clear any csum flags
+               m->m_pkthdr.csum_flags &= ~(M_CSUM_TCPv4|M_CSUM_UDPv4|M_CSUM_IPv4);
+       }
+       else if ( m->m_len >= amt )
+       {
+               // everything fits in the first mbuf, so futz with m->m_data, m->m_len and m->m_pkthdr.len to
+               // make it work
+               m->m_len -= ETHER_HDR_LEN;
+               m->m_data += ETHER_HDR_LEN;
+               m->m_pkthdr.len -= ETHER_HDR_LEN;
+        
+               // now do the checksums we need -- first IP
+               if ( m->m_pkthdr.csum_flags & M_CSUM_IPv4 )
+               {
+                       struct ip *ip = mtod( m, struct ip* );
+                       ip->ip_sum = in_cksum( m, hlen );
+            
+#ifdef VERY_VERY_VERY_DIAGNOSTIC
+                       printf( "bridge: performed IPv4 checksum\n" );
+#endif
+               }
+        
+               // now do a TCP or UDP delayed checksum
+               if ( m->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_UDPv4) )
+               {
+                       in_delayed_cksum( m );
+            
+#ifdef VERY_VERY_VERY_DIAGNOSTIC
+                       printf( "bridge: performed TCPv4/UDPv4 checksum\n" );
+#endif
+               }
+               
+               // now stick the ethernet header back on
+               m->m_len += ETHER_HDR_LEN;
+               m->m_data -= ETHER_HDR_LEN;
+               m->m_pkthdr.len += ETHER_HDR_LEN;
+        
+               // and clear any csum flags
+               m->m_pkthdr.csum_flags &= ~(M_CSUM_TCPv4|M_CSUM_UDPv4|M_CSUM_IPv4);
+       }
+       else
+       {
+               struct mbuf *m_ip;
+        
+               // general case -- need to simply split it off and deal
+        
+               // first, calculate how much needs to be made writable (we may have a read-only mbuf here)
+               hlen = M_CSUM_DATA_IPv4_IPHL( m->m_pkthdr.csum_data );
+#if PARANOID
+               off = M_CSUM_DATA_IPv4_OFFSET( m->m_pkthdr.csum_data );
+               
+               if ( m->m_pkthdr.csum_flags & M_CSUM_IPv4 )
+               {
+                       amt += hlen;
+               }
+               
+               if ( m->m_pkthdr.csum_flags & M_CSUM_TCPv4 )
+               {
+                       amt += sizeof( struct tcphdr * );
+                       amt += off;
+               }
+               
+               if ( m->m_pkthdr.csum_flags & M_CSUM_UDPv4 )
+               {
+                       amt += sizeof( struct udphdr * );
+                       amt += off;
+               }
+#endif
+        
+               // now split the ethernet header off of the IP packet (we'll re-attach later)
+               m_ip = m_split( m, ETHER_HDR_LEN, M_NOWAIT );
+               if ( m_ip == NULL )
+               {
+                       printf( "bridge_fix_txcsum: could not split ether header\n" );
+            
+                       m_freem( m );
+                       return NULL;
+               }
+        
+#if PARANOID
+               // make sure that the IP packet is writable for the portion we need
+               if ( m_makewritable( &m_ip, 0, amt, M_DONTWAIT ) != 0 )
+               {
+                       printf( "bridge_fix_txcsum: could not make %d bytes writable\n", amt );
+            
+                       m_freem( m );
+                       m_freem( m_ip );
+                       return NULL;
+               }
+#endif
+               
+               m_ip->m_pkthdr.csum_flags = m->m_pkthdr.csum_flags;
+               m_ip->m_pkthdr.csum_data = m->m_pkthdr.csum_data;
+        
+               m->m_pkthdr.csum_flags = 0;
+               m->m_pkthdr.csum_data = 0;
+        
+               // now do the checksums we need -- first IP
+               if ( m_ip->m_pkthdr.csum_flags & M_CSUM_IPv4 )
+               {
+                       // make sure the IP header (or at least the part with the cksum) is there
+                       m_ip = m_pullup( m_ip, sizeof( struct ip ) );
+                       if ( m_ip == NULL )
+                       {
+                               printf( "bridge: failed to flatten header\n ");
+                               m_freem( m );
+                               return NULL;
+                       }
+                       
+                       // now do the checksum
+                       {
+                               struct ip *ip = mtod( m_ip, struct ip* );
+                               ip->ip_sum = in_cksum( m_ip, hlen );
+                
+#ifdef VERY_VERY_VERY_DIAGNOSTIC
+                               printf( "bridge: performed IPv4 checksum\n" );
+#endif
+                       }
+               }
+        
+               // now do a TCP or UDP delayed checksum
+               if ( m_ip->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_UDPv4) )
+               {
+                       in_delayed_cksum( m_ip );
+            
+#ifdef VERY_VERY_VERY_DIAGNOSTIC
+                       printf( "bridge: performed TCPv4/UDPv4 checksum\n" );
+#endif
+               }
+        
+               // now attach the ethernet header back onto the IP packet
+               m->m_next = m_ip;
+               m->m_pkthdr.len += m_length( m_ip );    
+        
+               // clear the M_PKTHDR flags on the ip packet (again, we re-attach later)
+               m_ip->m_flags &= ~M_PKTHDR;
+        
+               // and clear any csum flags
+               m->m_pkthdr.csum_flags &= ~(M_CSUM_TCPv4|M_CSUM_UDPv4|M_CSUM_IPv4);
+       }
+       
+       return m;
+}
+#endif
+
+/*
+ * bridge_start:
+ *
+ *     Start output on a bridge.
+ */
+static errno_t
+bridge_start(ifnet_t ifp, mbuf_t m)
+{
+       struct bridge_softc *sc = ifnet_softc(ifp);
+       struct ether_header *eh;
+       struct ifnet *dst_if;
+       
+       lck_mtx_assert(sc->sc_mtx, LCK_MTX_ASSERT_NOTOWNED);
+
+       eh = mtod(m, struct ether_header *);
+       
+       if ((m->m_flags & (M_BCAST|M_MCAST)) == 0 &&
+               (dst_if = bridge_rtlookup(sc, eh->ether_dhost)) != NULL) {
+               
+               {
+#if APPLE_BRIDGE_HWCKSUM_SUPPORT
+                       /* 
+                        * APPLE MODIFICATION - if the packet needs a checksum (i.e., 
+                        * checksum has been deferred for HW support) AND the destination
+                        * interface doesn't support HW checksums, then we 
+                        * need to fix-up the checksum here
+                        */
+                       if (
+                               ( (m->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_UDPv4|M_CSUM_IPv4) ) != 0 ) &&
+                               ( (dst_if->if_csum_flags_tx & m->m_pkthdr.csum_flags ) != m->m_pkthdr.csum_flags )
+                               )
+                       {
+                               m = bridge_fix_txcsum( m );
+                               if ( m == NULL )
+                               {
+                                       goto done;
+                               }
+                       }
+                       
+#else
+                       if (eh->ether_type == htons(ETHERTYPE_IP))
+                               mbuf_outbound_finalize(m, PF_INET, sizeof(struct ether_header));
+                       else
+                               m->m_pkthdr.csum_flags = 0;
+#endif
+                       lck_mtx_lock(sc->sc_mtx);
+                       #if NBPFILTER > 0
+                               if (sc->sc_bpf_output)
+                                       bridge_bpf_output(ifp, m);
+                       #endif
+                       bridge_enqueue(sc, dst_if, m);
+                       lck_mtx_unlock(sc->sc_mtx);
+               }
+       } else
+       {
+#if APPLE_BRIDGE_HWCKSUM_SUPPORT
+               
+               /* 
+                * APPLE MODIFICATION - if the MULTICAST packet needs a checksum (i.e., 
+                * checksum has been deferred for HW support) AND at least one destination
+                * interface doesn't support HW checksums, then we go ahead and fix it up
+                * here, since it doesn't make sense to do it more than once
+                */
+               
+               if (
+                       (m->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_UDPv4|M_CSUM_IPv4)) &&
+                       /*
+                        * XXX FIX ME: keep track of whether or not we have any interfaces that 
+                        * do not support checksums (for now, assume we do)
+                        */
+                       ( 1 )
+                       )
+               {
+                       m = bridge_fix_txcsum( m );
+                       if ( m == NULL )
+                       {
+                               goto done;
+                       }
+               }
+#else
+               if (eh->ether_type == htons(ETHERTYPE_IP))
+                       mbuf_outbound_finalize(m, PF_INET, sizeof(struct ether_header));
+               else
+                       m->m_pkthdr.csum_flags = 0;
+#endif
+               
+               lck_mtx_lock(sc->sc_mtx);
+               #if NBPFILTER > 0
+                       if (sc->sc_bpf_output)
+                               bridge_bpf_output(ifp, m);
+               #endif
+               bridge_broadcast(sc, ifp, m, 0);
+               lck_mtx_unlock(sc->sc_mtx);
+       }
+#if APPLE_BRIDGE_HWCKSUM_SUPPORT
+done:
+#endif
+
+       return 0;
+}
+
+/*
+ * bridge_forward:
+ *
+ *     The forwarding function of the bridge.
+ */
+static void
+bridge_forward(struct bridge_softc *sc, struct mbuf *m)
+{
+       struct bridge_iflist *bif;
+       struct ifnet *src_if, *dst_if;
+       struct ether_header *eh;
+
+       lck_mtx_assert(sc->sc_mtx, LCK_MTX_ASSERT_OWNED);
+
+#if BRIDGE_DEBUG
+       if (_if_brige_debug)
+        printf("bridge_forward %s%d m%p\n", ifnet_name(sc->sc_if), ifnet_unit(sc->sc_if), m);
+#endif /* BRIDGE_DEBUG */
+       
+       src_if = m->m_pkthdr.rcvif;
+       
+       (void) ifnet_stat_increment_in(sc->sc_if, 1, m->m_pkthdr.len, 0);
+       
+       /*
+        * Look up the bridge_iflist.
+        */
+       bif = bridge_lookup_member_if(sc, src_if);
+       if (bif == NULL) {
+               /* Interface is not a bridge member (anymore?) */
+               m_freem(m);
+               return;
+       }
+       
+       /* APPLE MODIFICATION <cbz@apple.com> - add the ability to block forwarding of packets; for the guest network */
+#if ( APPLE_HAVE_80211_GUEST_NETWORK )
+       if (bif->bif_flags & IFBIF_NO_FORWARDING) {
+               /* Drop the packet and we're done. */
+               m_freem(m);
+               return;
+       }
+#endif
+       
+       if (bif->bif_flags & IFBIF_STP) {
+               switch (bif->bif_state) {
+            case BSTP_IFSTATE_BLOCKING:
+            case BSTP_IFSTATE_LISTENING:
+            case BSTP_IFSTATE_DISABLED:
+                m_freem(m);
+                return;
+               }
+       }
+       
+       eh = mtod(m, struct ether_header *);
+       
+       /*
+        * If the interface is learning, and the source
+        * address is valid and not multicast, record
+        * the address.
+        */
+       if ((bif->bif_flags & IFBIF_LEARNING) != 0 &&
+           ETHER_IS_MULTICAST(eh->ether_shost) == 0 &&
+           (eh->ether_shost[0] | eh->ether_shost[1] |
+            eh->ether_shost[2] | eh->ether_shost[3] |
+            eh->ether_shost[4] | eh->ether_shost[5]) != 0) {
+               (void) bridge_rtupdate(sc, eh->ether_shost,
+                               src_if, 0, IFBAF_DYNAMIC);
+       }
+       
+       if ((bif->bif_flags & IFBIF_STP) != 0 &&
+           bif->bif_state == BSTP_IFSTATE_LEARNING) {
+               m_freem(m);
+               return;
+       }
+       
+       /*
+        * At this point, the port either doesn't participate
+        * in spanning tree or it is in the forwarding state.
+        */
+       
+       /*
+        * If the packet is unicast, destined for someone on
+        * "this" side of the bridge, drop it.
+        */
+       if ((m->m_flags & (M_BCAST|M_MCAST)) == 0) {
+        /* APPLE MODIFICATION <cbz@apple.com> - if the packet came in on a proxy sta discovery interface,
+         we need to not look up the node by DA of the packet; we need to look up the proxy sta which 
+         matches the SA.  If it's not found yet, drop the packet. */
+#if IEEE80211_PROXYSTA
+               if (bif->bif_flags & IFBIF_PROXYSTA_DISCOVER)
+               {
+                       struct bridge_rtnode *brt;
+                       dst_if = NULL;
+                       brt = bridge_rtnode_lookup(sc, eh->ether_shost);
+                       if (brt) {
+                               dst_if = brt->brt_ifp_proxysta;
+                       }
+                       if (dst_if == NULL) {
+                               m_freem(m);
+                               return;
+                       }
+               }
+               else
+#endif 
+            dst_if = bridge_rtlookup(sc, eh->ether_dhost);
+               if (src_if == dst_if) {
+                       m_freem(m);
+                       return;
+               }
+       } else {
+               /* ...forward it to all interfaces. */
+               sc->sc_if->if_imcasts++;
+               dst_if = NULL;
+       }
+       
+       /* APPLE MODIFICATION  
+     <rnewberry@apple.com>     - this is now handled by bridge_input
+     <cbz@apple.com>           - turning this back on because all packets are not bpf_mtap'd
+     equally.  RSN Preauth were not getting through; we're 
+     conditionalizing this call on 
+     (eh->ether_type == htons(ETHERTYPE_RSN_PREAUTH)) 
+     */
+#if 1
+       if (eh->ether_type == htons(ETHERTYPE_RSN_PREAUTH))
+       {
+        m->m_pkthdr.rcvif = sc->sc_if;
+#if NBPFILTER > 0
+        if (sc->sc_bpf_input)
+            bridge_bpf_input(sc->sc_if, m);
+#endif
+       }
+#endif
+        
+       if (dst_if == NULL) {
+        
+#if APPLE_BRIDGE_HWCKSUM_SUPPORT
+        /*
+         * Clear any in-bound checksum flags for this packet.
+         */
+        m->m_pkthdr.csum_flags = 0;
+#else
+               mbuf_inbound_modified(m);
+#endif
+        
+        bridge_broadcast(sc, src_if, m, 1);
+        return;
+       }
+       
+       /*
+        * At this point, we're dealing with a unicast frame
+        * going to a different interface.
+        */
+       if ((ifnet_flags(dst_if) & IFF_RUNNING) == 0) {
+               m_freem(m);
+               return;
+       }
+       bif = bridge_lookup_member_if(sc, dst_if);
+       if (bif == NULL) {
+               /* Not a member of the bridge (anymore?) */
+               m_freem(m);
+               return;
+       }
+       
+       if (bif->bif_flags & IFBIF_STP) {
+               switch (bif->bif_state) {
+            case BSTP_IFSTATE_DISABLED:
+            case BSTP_IFSTATE_BLOCKING:
+                m_freem(m);
+                return;
+               }
+       }
+        
+#if APPLE_BRIDGE_HWCKSUM_SUPPORT
+       /*
+        * Clear any in-bound checksum flags for this packet.
+        */
+       {
+               m->m_pkthdr.csum_flags = 0;
+       }
+#else
+       mbuf_inbound_modified(m);
+#endif
+       
+       bridge_enqueue(sc, dst_if, m);
+}
+
+char * ether_ntop(char *, size_t , const u_char *);
+
+__private_extern__ char *
+ether_ntop(char *buf, size_t len, const u_char *ap)
+{
+       snprintf(buf, len, "%02x:%02x:%02x:%02x:%02x:%02x", 
+                        ap[0], ap[1], ap[2], ap[3], ap[4], ap[5]);
+       
+       return buf;
+}
+
+/*
+ * bridge_input:
+ *
+ *     Receive input from a member interface.  Queue the packet for
+ *     bridging if it is not for us.
+ */
+errno_t
+bridge_input(struct bridge_iflist *bif, struct ifnet *ifp, struct mbuf *m, void *frame_header)
+{
+       struct ifnet *bifp;
+       struct ether_header *eh;
+       struct mbuf *mc;
+       int is_for_us = 0;
+       struct bridge_softc *sc = bif->bif_sc;
+       struct bridge_iflist *brm;
+       
+#if BRIDGE_DEBUG
+       if (_if_brige_debug)
+               printf("bridge_input: %s%d from %s%d m %p data %p\n", 
+                          ifnet_name(sc->sc_if), ifnet_unit(sc->sc_if),
+                          ifnet_name(ifp), ifnet_unit(ifp), 
+                          m, mbuf_data(m));
+#endif /* BRIDGE_DEBUG */
+
+       if ((ifnet_flags(sc->sc_if) & IFF_RUNNING) == 0) {
+#if BRIDGE_DEBUG
+               if (_if_brige_debug)
+                       printf( "bridge_input: %s%d not running passing along\n",
+                                  ifnet_name(sc->sc_if), ifnet_unit(sc->sc_if));
+#endif /* BRIDGE_DEBUG */
+               return 0;
+       }
+       
+       /* Need to clear the promiscous flags otherwise it will be dropped by DLIL after processing filters */
+       if ((mbuf_flags(m) & MBUF_PROMISC))
+               mbuf_setflags_mask(m, 0, MBUF_PROMISC);
+       
+       lck_mtx_lock(sc->sc_mtx);
+       
+       bifp = sc->sc_if;
+       
+       /* Is it a good idea to reassign a new value to bif ? TBD */
+       bif = bridge_lookup_member_if(sc, ifp);
+       if (bif == NULL) {
+               lck_mtx_unlock(sc->sc_mtx);
+#if BRIDGE_DEBUG
+               if (_if_brige_debug)
+                       printf( "bridge_input: %s%d bridge_lookup_member_if failed\n",
+                                  ifnet_name(sc->sc_if), ifnet_unit(sc->sc_if));
+#endif /* BRIDGE_DEBUG */
+               return 0;
+       }
+       
+       eh = (struct ether_header *)mbuf_data(m);
+       
+       /*
+        * If the packet is for us, set the packets source as the
+        * bridge, and return the packet back to ether_input for
+        * local processing.
+        */
+       if (memcmp(eh->ether_dhost, ifnet_lladdr(bifp),
+                          ETHER_ADDR_LEN) == 0) {
+               
+               /* Mark the packet as arriving on the bridge interface */
+               (void) mbuf_pkthdr_setrcvif(m, bifp);
+               mbuf_pkthdr_setheader(m, frame_header);
+               
+               /*
+                * If the interface is learning, and the source
+                * address is valid and not multicast, record
+                * the address.
+                */
+               if ((bif->bif_flags & IFBIF_LEARNING) != 0 &&
+                       ETHER_IS_MULTICAST(eh->ether_shost) == 0 &&
+                       (eh->ether_shost[0] | eh->ether_shost[1] |
+                        eh->ether_shost[2] | eh->ether_shost[3] |
+                        eh->ether_shost[4] | eh->ether_shost[5]) != 0) {
+                               (void) bridge_rtupdate(sc, eh->ether_shost,
+                                                                          ifp, 0, IFBAF_DYNAMIC);
+                       }
+               
+#if NBPFILTER > 0
+               if (sc->sc_bpf_input)
+                       bridge_bpf_input(bifp, m);
+#endif
+               
+               (void) mbuf_setdata(m, (char *)mbuf_data(m) + ETHER_HDR_LEN, mbuf_len(m) - ETHER_HDR_LEN);
+               (void) mbuf_pkthdr_adjustlen(m, - ETHER_HDR_LEN);
+               
+               (void) ifnet_stat_increment_in(bifp, 1, mbuf_pkthdr_len(m), 0);
+
+               lck_mtx_unlock(sc->sc_mtx);
+                               
+#if BRIDGE_DEBUG
+               if (_if_brige_debug)
+                       printf( "bridge_input: %s%d packet for bridge\n",
+                                  ifnet_name(sc->sc_if), ifnet_unit(sc->sc_if));
+#endif /* BRIDGE_DEBUG */
+               
+               dlil_input_packet_list(bifp, m);
+               
+               return EJUSTRETURN;
+       }
+       
+       /*
+        * if the destination of the packet is for the MAC address of 
+        * the member interface itself, then we don't need to forward
+        * it -- just pass it back.  Note that it'll likely just be
+        * dropped by the stack, but if something else is bound to 
+        * the interface directly (for example, the wireless stats
+        * protocol -- although that actually uses BPF right now), 
+        * then it will consume the packet
+        *
+        * ALSO, note that we do this check AFTER checking for the 
+        * bridge's own MAC address, because the bridge may be
+        * using the SAME MAC address as one of its interfaces
+        */
+       if (memcmp(eh->ether_dhost, ifnet_lladdr(ifp),
+                          ETHER_ADDR_LEN) == 0) {
+               /* APPLE MODIFICATION <cbz@apple.com> - add support for Proxy STA */
+#if IEEE80211_PROXYSTA
+               if ((bif->bif_flags & IFBIF_PROXYSTA) == 0) {
+#endif
+                       
+#ifdef VERY_VERY_VERY_DIAGNOSTIC
+                       printf("bridge_input: not forwarding packet bound for member interface\n" );
+#endif
+                       lck_mtx_unlock(sc->sc_mtx);
+                       return 0;
+                       /* APPLE MODIFICATION <cbz@apple.com> - add support for Proxy STA */
+#if IEEE80211_PROXYSTA
+               }
+#if VERY_VERY_VERY_DIAGNOSTIC
+               else {
+                       printf( "%s: pkt rx on %s [proxysta iface], da is %02x:%02x:%02x:%02x:%02x:%02x\n",
+                                  __func__, ifp->if_xname, eh->ether_dhost[0], eh->ether_dhost[1], eh->ether_dhost[2], 
+                                  eh->ether_dhost[3], eh->ether_dhost[4], eh->ether_dhost[5] );
+               }
+#endif
+#endif
+       }
+       
+       if ((m->m_flags & (M_BCAST|M_MCAST))) {
+               struct ifmultiaddr *ifma = NULL;
+               
+               if ((m->m_flags & M_BCAST)) {
+                       is_for_us = 1;
+               } else {
+#if BRIDGE_DEBUG
+                       printf("mulicast: %02x:%02x:%02x:%02x:%02x:%02x\n",
+                                  eh->ether_dhost[0], eh->ether_dhost[1], eh->ether_dhost[2], 
+                                  eh->ether_dhost[3], eh->ether_dhost[4], eh->ether_dhost[5]);
+                       
+                       for (ifma = bifp->if_multiaddrs.lh_first; ifma;
+                                ifma = ifma->ifma_link.le_next) {
+                               
+                               if (ifma->ifma_addr == NULL)
+                                       printf("  <none> ");
+                               else if (ifma->ifma_addr->sa_family == AF_INET) {
+                                       struct sockaddr_in *sin = (struct sockaddr_in *)ifma->ifma_addr;
+                                       
+                                       printf("  %u.%u.%u.%u ",
+                                                  (sin->sin_addr.s_addr & 0xff000000) >> 24,
+                                                  (sin->sin_addr.s_addr & 0x00ff0000) >> 16,
+                                                  (sin->sin_addr.s_addr & 0x0000ff00) >> 8,
+                                                  (sin->sin_addr.s_addr & 0x000000ff));
+                               }
+                               if (!ifma->ifma_ll || !ifma->ifma_ll->ifma_addr)
+                                       printf("<none>\n");
+                               else {
+                                       struct sockaddr_dl *sdl = (struct sockaddr_dl *)ifma->ifma_ll->ifma_addr;
+                                       
+                                       printf("%02x:%02x:%02x:%02x:%02x:%02x\n",
+                                                  CONST_LLADDR(sdl)[0], CONST_LLADDR(sdl)[1], CONST_LLADDR(sdl)[2], 
+                                                  CONST_LLADDR(sdl)[3], CONST_LLADDR(sdl)[4], CONST_LLADDR(sdl)[5]);
+                                       
+                               }
+                       }
+#endif /* BRIDGE_DEBUG */
+                       
+                       /*
+                        * the upper layer of the stack have attached a list of multicast addresses to the bridge itself
+                        * (for example, the IP stack has bound 01:00:5e:00:00:01 to the 224.0.0.1 all hosts address), since
+                        * the IP stack is bound to the bridge.  so we need to see if the packets arriving here SHOULD be 
+                        * passed up as coming from the bridge.
+                        *
+                        * furthermore, since we know the IP stack is attached to the bridge, and NOTHING is attached
+                        * to the underlying devices themselves, we can drop packets that don't need to go up (by returning NULL
+                        * from bridge_input to the caller) after we forward the packet to other interfaces
+                        */
+                       
+                       for (ifma = bifp->if_multiaddrs.lh_first; ifma;
+                                ifma = ifma->ifma_link.le_next) {
+                               if (ifma->ifma_ll && ifma->ifma_ll->ifma_addr) {
+                                       struct sockaddr_dl *sdl = (struct sockaddr_dl *)ifma->ifma_ll->ifma_addr;
+                                       
+                                       if (memcmp(eh->ether_dhost, CONST_LLADDR(sdl), ETHER_ADDR_LEN) == 0)
+                                               break;
+                               }
+                       }
+                       if (ifma != NULL) {
+                               /* this packet matches the bridge's own filter, so pass it up as coming from us */
+                               
+                               /* Mark the packet as arriving on the bridge interface */
+                               // don't do this until AFTER we forward the packet -- bridge_forward uses this information
+                               //m->m_pkthdr.rcvif = bifp;
+                               
+                               /* keep track of this to help us decide about forwarding */
+                               is_for_us = 1;
+                               
+#if BRIDGE_DEBUG
+                               char    addr[sizeof("XX:XX:XX:XX:XX:XX")+1];
+                               printf( "bridge_input: multicast frame for us (%s)\n",
+                                          ether_ntop(addr, sizeof(addr), eh->ether_dhost) );
+#endif
+                       } else {
+#if BRIDGE_DEBUG
+                               char    addr[sizeof("XX:XX:XX:XX:XX:XX")+1];
+                               printf( "bridge_input: multicast frame for unbound address (%s), forwarding but not passing to stack\n",
+                                          ether_ntop(addr, sizeof(addr), eh->ether_dhost) );
+#endif
+                       }
+               }
+               /* Tap off 802.1D packets; they do not get forwarded. */
+               if (memcmp(eh->ether_dhost, bstp_etheraddr,
+                                  ETHER_ADDR_LEN) == 0) {
+                       m = bstp_input(sc, ifp, m);
+                       if (m == NULL) {
+                               lck_mtx_unlock(sc->sc_mtx);
+#if BRIDGE_DEBUG
+                               if (_if_brige_debug)
+                                       printf( "bridge_input: %s%d mcast BSTP not forwarded\n",
+                                                  ifnet_name(sc->sc_if), ifnet_unit(sc->sc_if));
+#endif /* BRIDGE_DEBUG */
+                               return EJUSTRETURN;
+                       }
+               }
+               
+               if (bif->bif_flags & IFBIF_STP) {
+                       switch (bif->bif_state) {
+                               case BSTP_IFSTATE_BLOCKING:
+                               case BSTP_IFSTATE_LISTENING:
+                               case BSTP_IFSTATE_DISABLED:
+                               {
+                                       lck_mtx_unlock(sc->sc_mtx);
+                                       
+#if BRIDGE_DEBUG
+                                       if (_if_brige_debug)
+                                               printf( "bridge_input: %s%d mcast bridge not learning or forwarding \n",
+                                                          ifnet_name(sc->sc_if), ifnet_unit(sc->sc_if));
+#endif /* BRIDGE_DEBUG */
+                                       
+                                       m_freem(m);
+                                       return EJUSTRETURN;
+                               }
+                       }
+               }
+               
+               /*
+                * If the interface is learning, and the source
+                * address is valid and not multicast, record
+                * the address.
+                */
+               if ((bif->bif_flags & IFBIF_LEARNING) != 0 &&
+                       ETHER_IS_MULTICAST(eh->ether_shost) == 0 &&
+                       (eh->ether_shost[0] | eh->ether_shost[1] |
+                        eh->ether_shost[2] | eh->ether_shost[3] |
+                        eh->ether_shost[4] | eh->ether_shost[5]) != 0) {
+                               (void) bridge_rtupdate(sc, eh->ether_shost,
+                                                                          ifp, 0, IFBAF_DYNAMIC);
+                       }
+               
+               if (is_for_us) {
+                       /*
+                        * Make a deep copy of the packet and enqueue the copy
+                        * for bridge processing; return the original packet for
+                        * local processing.
+                        */
+                       mc = m_dup(m, M_NOWAIT);
+                       if (mc == NULL) {
+#ifdef DIAGNOSTIC
+                               printf( "bridge_input: failed to duplicate multicast frame, not forwarding\n" );
+#endif
+#if BRIDGE_DEBUG
+                       } else {
+                               if (_if_brige_debug) {
+                                       printf_mbuf(mc, "mc for us: ", "\n");
+                                       printf_mbuf_data(m, 0, 20);
+                                       printf("\n");
+                               }
+#endif /* BRIDGE_DEBUG */
+                       }
+               } else {
+                       /*
+                        * we'll just pass the original, since we don't need to pass it
+                        * up the stack
+                        */
+                       mc = m;
+               }
+               
+               /* Perform the bridge forwarding function with the copy. */
+               if (mc != NULL) {
+#if BRIDGE_DEBUG
+                       if (_if_brige_debug)
+                               printf( "bridge_input: %s%d mcast forwarding \n",
+                                          ifnet_name(sc->sc_if), ifnet_unit(sc->sc_if));
+#endif /* BRIDGE_DEBUG */                      
+                       bridge_forward(sc, mc);
+               }
+               
+               // TBD should have an option for type of bridge
+#if 0
+               /*
+                * Reinject the mbuf as arriving on the bridge so we have a
+                * chance at claiming multicast packets. We can not loop back
+                * here from ether_input as a bridge is never a member of a
+                * bridge.
+                */
+               if (bifp->if_bridge != NULL)
+                       panic("brige_input: brige %p in a bridge %p\n", bifp, bifp->if_bridge);
+               mc = m_dup(m, M_NOWAIT);
+               if (mc != NULL) {
+                       mc->m_pkthdr.rcvif = bifp;
+#if NBPFILTER > 0
+                       if (sc->sc_bpf_input)
+                               bridge_bpf_input(bifp, mc);
+#endif
+               }
+#endif        
+               /* Return the original packet for local processing. */
+               if ( !is_for_us )
+               {
+                       /* we don't free the packet -- bridge_forward already did so */
+                       lck_mtx_unlock(sc->sc_mtx);
+                       
+#if BRIDGE_DEBUG
+                       if (_if_brige_debug)
+                               printf( "bridge_input: %s%d mcast local processing\n",
+                                          ifnet_name(sc->sc_if), ifnet_unit(sc->sc_if));
+#endif
+                       
+                       return EJUSTRETURN;
+               }
+               
+               // mark packet as arriving on the bridge
+               m->m_pkthdr.rcvif = bifp;
+               m->m_pkthdr.header = mbuf_data(m);
+               
+#if NBPFILTER > 0
+               if (sc->sc_bpf_input)
+                       bridge_bpf_input(bifp, m);
+#endif
+               (void) mbuf_setdata(m, (char *)mbuf_data(m) + ETHER_HDR_LEN, mbuf_len(m) - ETHER_HDR_LEN);
+               (void) mbuf_pkthdr_adjustlen(m, - ETHER_HDR_LEN);
+               
+               (void) ifnet_stat_increment_in(bifp, 1, mbuf_pkthdr_len(m), 0);
+               
+               lck_mtx_unlock(sc->sc_mtx);
+               
+#if BRIDGE_DEBUG
+               if (_if_brige_debug)
+                       printf( "bridge_input: %s%d mcast for us\n",
+                                  ifnet_name(sc->sc_if), ifnet_unit(sc->sc_if));
+#endif /* BRIDGE_DEBUG */
+               
+               dlil_input_packet_list(bifp, m);
+               
+               return EJUSTRETURN;
+       }
+       
+       if (bif->bif_flags & IFBIF_STP) {
+               switch (bif->bif_state) {
+                       case BSTP_IFSTATE_BLOCKING:
+                       case BSTP_IFSTATE_LISTENING:
+                       case BSTP_IFSTATE_DISABLED:
+                               lck_mtx_unlock(sc->sc_mtx);
+                               
+#if BRIDGE_DEBUG
+                               if (_if_brige_debug)
+                                       printf( "bridge_input: %s%d ucast bridge not learning or forwarding \n",
+                                                  ifnet_name(sc->sc_if), ifnet_unit(sc->sc_if));
+#endif /* BRIDGE_DEBUG */
+                               
+                               m_freem(m);
+                               return EJUSTRETURN;
+               }
+       }
+       
+       /* this code is not needed for Apple's bridge where the stack attaches directly */
+#if 1 /* TBD should be an option */
+       /*
+        * Unicast.  Make sure it's not for us.
+        */
+       LIST_FOREACH(brm, &sc->sc_iflist, bif_next) {
+               if(ifnet_type(brm->bif_ifp) != IFT_ETHER)
+                       continue;
+               
+               /* It is destined for us. */
+               if (memcmp(ifnet_lladdr(brm->bif_ifp), eh->ether_dhost,
+                                  ETHER_ADDR_LEN) == 0) {
+                       if (brm->bif_flags & IFBIF_LEARNING)
+                               (void) bridge_rtupdate(sc,
+                                                                          eh->ether_shost, ifp, 0, IFBAF_DYNAMIC);
+                       m->m_pkthdr.rcvif = brm->bif_ifp;
+                       m->m_pkthdr.header = mbuf_data(m);
+                       
+                       (void) mbuf_setdata(m, (char *)mbuf_data(m) + ETHER_HDR_LEN, mbuf_len(m) - ETHER_HDR_LEN);
+                       (void) mbuf_pkthdr_adjustlen(m, - ETHER_HDR_LEN);
+#if BRIDGE_SUPPORT_GIF
+#if NGIF > 0
+                       if (ifnet_type(ifp) == IFT_GIF) {
+                               m->m_flags |= M_PROTO1;
+                               m->m_pkthdr.rcvif = brm->bif_ifp;
+                               (*brm->bif_ifp->if_input)(brm->bif_ifp, m);
+                               m = NULL;
+                       }
+#endif
+#endif
+                       lck_mtx_unlock(sc->sc_mtx);
+                       
+#if BRIDGE_DEBUG
+                       if (_if_brige_debug)
+                               printf( "bridge_input: %s%d ucast to member %s%d\n",
+                                          ifnet_name(sc->sc_if), ifnet_unit(sc->sc_if),
+                                          ifnet_name(brm->bif_ifp), ifnet_unit(brm->bif_ifp));
+#endif /* BRIDGE_DEBUG */
+                       
+                       dlil_input_packet_list(brm->bif_ifp, m);
+                       
+                       return EJUSTRETURN;
+               }
+               
+               /* We just received a packet that we sent out. */
+               if (memcmp(ifnet_lladdr(brm->bif_ifp), eh->ether_shost,
+                                  ETHER_ADDR_LEN) == 0) {
+                       lck_mtx_unlock(sc->sc_mtx);
+                       
+#if BRIDGE_DEBUG
+                       if (_if_brige_debug)
+                               printf( "bridge_input: %s%d ucast drop packet we sent out\n",
+                                          ifnet_name(sc->sc_if), ifnet_unit(sc->sc_if));
+#endif /* BRIDGE_DEBUG */
+                       
+                       m_freem(m);
+                       return EJUSTRETURN;
+               }
+       }
+#endif
+       
+       /*
+        * If the interface is learning, and the source
+        * address is valid and not multicast, record
+        * the address.
+        */
+       if ((bif->bif_flags & IFBIF_LEARNING) != 0 &&
+               ETHER_IS_MULTICAST(eh->ether_shost) == 0 &&
+               (eh->ether_shost[0] | eh->ether_shost[1] |
+                eh->ether_shost[2] | eh->ether_shost[3] |
+                eh->ether_shost[4] | eh->ether_shost[5]) != 0) {
+                       (void) bridge_rtupdate(sc, eh->ether_shost,
+                                                                  ifp, 0, IFBAF_DYNAMIC);
+               }
+       
+       /* Perform the bridge forwarding function. */
+#if BRIDGE_DEBUG
+       if (_if_brige_debug)
+               printf( "bridge_input: %s%d ucast forwarding\n",
+                          ifnet_name(sc->sc_if), ifnet_unit(sc->sc_if));
+#endif /* BRIDGE_DEBUG */
+       
+       bridge_forward(sc, m);
+       lck_mtx_unlock(sc->sc_mtx);
+       return EJUSTRETURN;
+}
+
+/*
+ * bridge_broadcast:
+ *
+ *     Send a frame to all interfaces that are members of
+ *     the bridge, except for the one on which the packet
+ *     arrived.
+ */
+static void
+bridge_broadcast(struct bridge_softc *sc, struct ifnet *src_if,
+                 struct mbuf *m, __unused int runfilt)
+{
+       struct bridge_iflist *bif;
+       struct mbuf *mc;
+       struct ifnet *dst_if;
+       int used = 0;
+       
+       lck_mtx_assert(sc->sc_mtx, LCK_MTX_ASSERT_OWNED);
+       
+       LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
+               dst_if = bif->bif_ifp;
+               if (dst_if == src_if)
+                       continue;
+        
+               if (bif->bif_flags & IFBIF_STP) {
+                       switch (bif->bif_state) {
+                case BSTP_IFSTATE_BLOCKING:
+                case BSTP_IFSTATE_DISABLED:
+                    continue;
+                       }
+               }
+        
+               if ((bif->bif_flags & IFBIF_DISCOVER) == 0 &&
+                   (m->m_flags & (M_BCAST|M_MCAST)) == 0)
+                       continue;
+        
+               if ((ifnet_flags(dst_if) & IFF_RUNNING) == 0)
+                       continue;
+        
+               if (LIST_NEXT(bif, bif_next) == NULL) {
+                       mc = m;
+                       used = 1;
+               } else {
+                       mc = m_copym(m, 0, M_COPYALL, M_DONTWAIT);
+                       if (mc == NULL) {
+                               (void) ifnet_stat_increment_out(sc->sc_if, 0, 0, 1);
+                               continue;
+                       }
+               }
+        
+               bridge_enqueue(sc, dst_if, mc);
+       }
+       if (used == 0)
+               m_freem(m);
+}
+
+/*
+ * bridge_rtupdate:
+ *
+ *     Add a bridge routing entry.
+ */
+static int
+bridge_rtupdate(struct bridge_softc *sc, const uint8_t *dst,
+                struct ifnet *dst_if, int setflags, uint8_t flags)
+{
+       struct bridge_rtnode *brt;
+       int error;
+       /* APPLE MODIFICATION <cbz@apple.com> - add support for Proxy STA */
+#if IEEE80211_PROXYSTA
+       struct bridge_iflist *bif;
+       int is_pds; /* are we a proxy sta discovery interface? */
+#endif
+       struct timespec now;
+       
+       /* APPLE MODIFICATION <cbz@apple.com> - add support for Proxy STA - is this an interface 
+     we want to do proxy sta discovery on? */
+#if IEEE80211_PROXYSTA
+       bif = bridge_lookup_member_if(sc, dst_if);
+       if ((bif) && (bif->bif_flags & IFBIF_PROXYSTA_DISCOVER)) {
+               is_pds = 1;
+       }
+       else {
+               is_pds = 0;
+       }
+#endif         
+       /*
+        * A route for this destination might already exist.  If so,
+        * update it, otherwise create a new one.
+        */
+       if ((brt = bridge_rtnode_lookup(sc, dst)) == NULL) {
+        /* APPLE MODIFICATION <cbz@apple.com> - add support for Proxy STA */
+#if IEEE80211_PROXYSTA
+               /* don't count this address against the bridge cache (well, allow proxy stas to double that 
+         number...put *some* boundary on it.) if we are a proxy sta discovery interface */
+               if (is_pds) {
+                       if (sc->sc_brtcnt >= (sc->sc_brtmax+sc->sc_brtmax_proxysta))
+                               return (ENOSPC);
+               }
+               else
+#endif         
+            if (sc->sc_brtcnt >= sc->sc_brtmax)
+                return (ENOSPC);
+        
+               /*
+                * Allocate a new bridge forwarding node, and
+                * initialize the expiration time and Ethernet
+                * address.
+                */
+               brt = zalloc_noblock(bridge_rtnode_pool);
+               if (brt == NULL)
+                       return (ENOMEM);
+        
+               memset(brt, 0, sizeof(*brt));
+               nanouptime(&now);
+               brt->brt_expire = now.tv_sec + sc->sc_brttimeout;
+               brt->brt_flags = IFBAF_DYNAMIC;
+               memcpy(brt->brt_addr, dst, ETHER_ADDR_LEN);
+        
+        /* APPLE MODIFICATION <cbz@apple.com> - add support for Proxy STA - is this an interface 
+         we want to do proxy sta discovery on?  If so, post a monitoring event */
+#if IEEE80211_PROXYSTA
+               if (is_pds) {
+                       brt->brt_flags_ext |= IFBAF_EXT_PROXYSTA;
+#if DIAGNOSTIC
+                       printf( "%s: proxysta %02x:%02x:%02x:%02x:%02x:%02x on %s; discovery\n",
+                   __func__, dst[0], dst[1], dst[2], dst[3], dst[4], dst[5], dst_if->if_xname );
+#endif
+                       bridge_proxysta_discover( dst_if, dst );        
+               }       
+#endif
+        
+               if ((error = bridge_rtnode_insert(sc, brt)) != 0) {
+                       zfree(bridge_rtnode_pool, brt);
+                       return (error);
+               }
+       }
+       
+       brt->brt_ifp = dst_if;
+       if (setflags) {
+               brt->brt_flags = flags;
+               brt->brt_expire = (flags & IFBAF_STATIC) ? 0 :
+        now.tv_sec + sc->sc_brttimeout;
+       }
+       
+       /* APPLE MODIFICATION <cbz@apple.com> - add support for Proxy STA -  */
+#if IEEE80211_PROXYSTA
+       if (is_pds) {
+#if VERY_VERY_DIAGNOSTIC
+               printf( "%s: proxysta %02x:%02x:%02x:%02x:%02x:%02x on %s; reset timeout\n",
+               __func__, dst[0], dst[1], dst[2], dst[3], dst[4], dst[5], dst_if->if_xname );
+#endif
+               brt->brt_expire = (flags & IFBAF_STATIC) ? 0 :
+        now.tv_sec + sc->sc_brttimeout;
+       }       
+#endif
+       
+       return (0);
+}
+
+/*
+ * bridge_rtlookup:
+ *
+ *     Lookup the destination interface for an address.
+ */
+static struct ifnet *
+bridge_rtlookup(struct bridge_softc *sc, const uint8_t *addr)
+{
+       struct bridge_rtnode *brt;
+       
+       if ((brt = bridge_rtnode_lookup(sc, addr)) == NULL)
+               return (NULL);
+       
+       return (brt->brt_ifp);
+}
+
+/*
+ * bridge_rttrim:
+ *
+ *     Trim the routine table so that we have a number
+ *     of routing entries less than or equal to the
+ *     maximum number.
+ */
+static void
+bridge_rttrim(struct bridge_softc *sc)
+{
+       struct bridge_rtnode *brt, *nbrt;
+       
+       /* Make sure we actually need to do this. */
+       if (sc->sc_brtcnt <= sc->sc_brtmax)
+               return;
+       
+       /* Force an aging cycle; this might trim enough addresses. */
+       bridge_rtage(sc);
+       if (sc->sc_brtcnt <= sc->sc_brtmax)
+               return;
+       
+       for (brt = LIST_FIRST(&sc->sc_rtlist); brt != NULL; brt = nbrt) {
+               nbrt = LIST_NEXT(brt, brt_list);
+               if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
+                       bridge_rtnode_destroy(sc, brt);
+                       if (sc->sc_brtcnt <= sc->sc_brtmax)
+                               return;
+               }
+       }
+}
+
+/*
+ * bridge_timer:
+ *
+ *     Aging timer for the bridge.
+ */
+static void
+bridge_timer(void *arg)
+{
+       struct bridge_softc *sc = arg;
+       struct timespec ts;
+       
+       lck_mtx_lock(sc->sc_mtx);
+       
+       bridge_rtage(sc);
+       
+       lck_mtx_unlock(sc->sc_mtx);
+       
+       if (ifnet_flags(sc->sc_if) & IFF_RUNNING) {
+               ts.tv_sec = bridge_rtable_prune_period;
+               ts.tv_nsec = 0;
+               bsd_timeout(bridge_timer, sc, &ts);
+       }
+}
+
+/*
+ * bridge_rtage:
+ *
+ *     Perform an aging cycle.
+ */
+static void
+bridge_rtage(struct bridge_softc *sc)
+{
+       struct bridge_rtnode *brt, *nbrt;
+       struct timespec now;
+       
+       nanouptime(&now);
+       
+       for (brt = LIST_FIRST(&sc->sc_rtlist); brt != NULL; brt = nbrt) {
+               nbrt = LIST_NEXT(brt, brt_list);
+               if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
+                       if ((unsigned long)now.tv_sec >= brt->brt_expire)
+                               bridge_rtnode_destroy(sc, brt);
+               }
+       }
+}
+
+/*
+ * bridge_rtflush:
+ *
+ *     Remove all dynamic addresses from the bridge.
+ */
+static void
+bridge_rtflush(struct bridge_softc *sc, int full)
+{
+       struct bridge_rtnode *brt, *nbrt;
+       
+       for (brt = LIST_FIRST(&sc->sc_rtlist); brt != NULL; brt = nbrt) {
+               nbrt = LIST_NEXT(brt, brt_list);
+               if (full || (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC)
+                       bridge_rtnode_destroy(sc, brt);
+       }
+}
+
+/* APPLE MODIFICATION <cbz@apple.com> - add support for Proxy STA */
+#if IEEE80211_PROXYSTA
+/*
+ * bridge_rtdiscovery:
+ *
+ */
+static void
+bridge_rtdiscovery(struct bridge_softc *sc)
+{
+       struct bridge_rtnode *brt, *nbrt;
+       struct bridge_iflist *bif;
+       
+       for (brt = LIST_FIRST(&sc->sc_rtlist); brt != NULL; brt = nbrt) {
+               nbrt = LIST_NEXT(brt, brt_list);
+               bif = bridge_lookup_member_if(sc, brt->brt_ifp);
+               if ((bif) && (bif->bif_flags & IFBIF_PROXYSTA_DISCOVER) && 
+                       ((brt->brt_flags_ext & IFBAF_EXT_PROXYSTA) == 0)) {
+#if DIAGNOSTIC
+                       printf( "%s: proxysta %02x:%02x:%02x:%02x:%02x:%02x on %s; found before IFBIF_PROXYSTA_DISCOVER\n",
+                                  __func__, brt->brt_addr[0], brt->brt_addr[1], brt->brt_addr[2], brt->brt_addr[3], 
+                                  brt->brt_addr[4], brt->brt_addr[5], brt->brt_ifp->if_xname );
+#endif
+                       brt->brt_flags_ext |= IFBAF_EXT_PROXYSTA;
+               }
+               
+               if (brt->brt_ifp_proxysta == NULL) {
+#if DIAGNOSTIC
+                       printf( "%s: proxysta %02x:%02x:%02x:%02x:%02x:%02x on %s; discovery\n",
+                                  __func__, brt->brt_addr[0], brt->brt_addr[1], brt->brt_addr[2], brt->brt_addr[3], 
+                                  brt->brt_addr[4], brt->brt_addr[5], brt->brt_ifp->if_xname );
+#endif
+                       bridge_proxysta_discover( brt->brt_ifp, brt->brt_addr );        
+               }
+       }
+}
+
+/*
+ * bridge_rtpurge:
+ *
+ *     Remove all dynamic addresses from a specific interface on the bridge.
+ */
+static void
+bridge_rtpurge(struct bridge_softc *sc, struct ifnet *ifs)
+{
+       struct bridge_rtnode *brt, *nbrt;
+       
+       for (brt = LIST_FIRST(&sc->sc_rtlist); brt != NULL; brt = nbrt) {
+               nbrt = LIST_NEXT(brt, brt_list);
+               if (brt->brt_ifp == ifs) {
+#if DIAGNOSTIC
+                       printf( "%s: purge %s [%02x:%02x:%02x:%02x:%02x:%02x] discovered on %s\n",
+                   __func__, brt->brt_ifp_proxysta ? brt->brt_ifp_proxysta->if_xname : brt->brt_ifp->if_xname, 
+                   brt->brt_addr[0], brt->brt_addr[1], brt->brt_addr[2], 
+                   brt->brt_addr[3], brt->brt_addr[4], brt->brt_addr[5], brt->brt_ifp->if_xname );
+#endif
+                       bridge_rtnode_destroy(sc, brt);
+               }
+       }
+}
+#endif
+
+/*
+ * bridge_rtdaddr:
+ *
+ *     Remove an address from the table.
+ */
+static int
+bridge_rtdaddr(struct bridge_softc *sc, const uint8_t *addr)
+{
+       struct bridge_rtnode *brt;
+       
+       if ((brt = bridge_rtnode_lookup(sc, addr)) == NULL)
+               return (ENOENT);
+       
+       bridge_rtnode_destroy(sc, brt);
+       return (0);
+}
+
+/*
+ * bridge_rtdelete:
+ *
+ *     Delete routes to a speicifc member interface.
+ */
+__private_extern__ void
+bridge_rtdelete(struct bridge_softc *sc, struct ifnet *ifp, int full)
+{
+       struct bridge_rtnode *brt, *nbrt;
+       
+       for (brt = LIST_FIRST(&sc->sc_rtlist); brt != NULL; brt = nbrt) {
+               nbrt = LIST_NEXT(brt, brt_list);
+               if (brt->brt_ifp == ifp && (full ||
+                                    (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC))
+                       bridge_rtnode_destroy(sc, brt);
+       }
+}
+
+/*
+ * bridge_rtable_init:
+ *
+ *     Initialize the route table for this bridge.
+ */
+static int
+bridge_rtable_init(struct bridge_softc *sc)
+{
+       int i;
+       
+       sc->sc_rthash = _MALLOC(sizeof(*sc->sc_rthash) * BRIDGE_RTHASH_SIZE,
+                            M_DEVBUF, M_WAITOK);
+       if (sc->sc_rthash == NULL)
+               return (ENOMEM);
+       
+       for (i = 0; i < BRIDGE_RTHASH_SIZE; i++)
+               LIST_INIT(&sc->sc_rthash[i]);
+       
+       sc->sc_rthash_key = random();
+       
+       LIST_INIT(&sc->sc_rtlist);
+       
+       return (0);
+}
+
+/*
+ * bridge_rtable_fini:
+ *
+ *     Deconstruct the route table for this bridge.
+ */
+static void
+bridge_rtable_fini(struct bridge_softc *sc)
+{
+       
+       _FREE(sc->sc_rthash, M_DEVBUF);
+}
+
+/*
+ * The following hash function is adapted from "Hash Functions" by Bob Jenkins
+ * ("Algorithm Alley", Dr. Dobbs Journal, September 1997).
+ */
+#define        mix(a, b, c)                                                    \
+do {                                                                   \
+a -= b; a -= c; a ^= (c >> 13);                                        \
+b -= c; b -= a; b ^= (a << 8);                                 \
+c -= a; c -= b; c ^= (b >> 13);                                        \
+a -= b; a -= c; a ^= (c >> 12);                                        \
+b -= c; b -= a; b ^= (a << 16);                                        \
+c -= a; c -= b; c ^= (b >> 5);                                 \
+a -= b; a -= c; a ^= (c >> 3);                                 \
+b -= c; b -= a; b ^= (a << 10);                                        \
+c -= a; c -= b; c ^= (b >> 15);                                        \
+} while (/*CONSTCOND*/0)
+
+static uint32_t
+bridge_rthash(__unused struct bridge_softc *sc, const uint8_t *addr)
+{
+       /* APPLE MODIFICATION - wasabi performance improvment - simplify the hash algorithm */
+#if 0
+       uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = sc->sc_rthash_key;
+       
+       b += addr[5] << 8;
+       b += addr[4];
+       a += addr[3] << 24;
+       a += addr[2] << 16;
+       a += addr[1] << 8;
+       a += addr[0];
+       
+       mix(a, b, c);
+       
+       return (c & BRIDGE_RTHASH_MASK);
+#else
+       return addr[5];
+#endif
+}
+
+#undef mix
+
+/*
+ * bridge_rtnode_lookup:
+ *
+ *     Look up a bridge route node for the specified destination.
+ */
+static struct bridge_rtnode *
+bridge_rtnode_lookup(struct bridge_softc *sc, const uint8_t *addr)
+{
+       struct bridge_rtnode *brt;
+       uint32_t hash;
+       int dir;
+       
+       hash = bridge_rthash(sc, addr);
+       LIST_FOREACH(brt, &sc->sc_rthash[hash], brt_hash) {
+               dir = memcmp(addr, brt->brt_addr, ETHER_ADDR_LEN);
+               if (dir == 0)
+                       return (brt);
+               if (dir > 0)
+                       return (NULL);
+       }
+       
+       return (NULL);
+}
+
+/*
+ * bridge_rtnode_insert:
+ *
+ *     Insert the specified bridge node into the route table.  We
+ *     assume the entry is not already in the table.
+ */
+static int
+bridge_rtnode_insert(struct bridge_softc *sc, struct bridge_rtnode *brt)
+{
+       struct bridge_rtnode *lbrt;
+       uint32_t hash;
+       int dir;
+       
+       hash = bridge_rthash(sc, brt->brt_addr);
+       
+       lbrt = LIST_FIRST(&sc->sc_rthash[hash]);
+       if (lbrt == NULL) {
+               LIST_INSERT_HEAD(&sc->sc_rthash[hash], brt, brt_hash);
+               goto out;
+       }
+       
+       do {
+               dir = memcmp(brt->brt_addr, lbrt->brt_addr, ETHER_ADDR_LEN);
+               if (dir == 0)
+                       return (EEXIST);
+               if (dir > 0) {
+                       LIST_INSERT_BEFORE(lbrt, brt, brt_hash);
+                       goto out;
+               }
+               if (LIST_NEXT(lbrt, brt_hash) == NULL) {
+                       LIST_INSERT_AFTER(lbrt, brt, brt_hash);
+                       goto out;
+               }
+               lbrt = LIST_NEXT(lbrt, brt_hash);
+       } while (lbrt != NULL);
+       
+#ifdef DIAGNOSTIC
+       panic("bridge_rtnode_insert: impossible");
+#endif
+       
+out:
+       LIST_INSERT_HEAD(&sc->sc_rtlist, brt, brt_list);
+       sc->sc_brtcnt++;
+       
+       return (0);
+}
+
+/*
+ * bridge_rtnode_destroy:
+ *
+ *     Destroy a bridge rtnode.
+ */
+static void
+bridge_rtnode_destroy(struct bridge_softc *sc, struct bridge_rtnode *brt)
+{
+       lck_mtx_assert(sc->sc_mtx, LCK_MTX_ASSERT_OWNED);
+       
+       /* APPLE MODIFICATION <cbz@apple.com> - add support for Proxy STA */
+#if IEEE80211_PROXYSTA
+       if (brt->brt_flags_ext & IFBAF_EXT_PROXYSTA) {
+#if DIAGNOSTIC
+               printf( "%s: proxysta %02x:%02x:%02x:%02x:%02x:%02x %s from %s; idle timeout\n",
+               __func__, brt->brt_addr[0], brt->brt_addr[1], brt->brt_addr[2], 
+               brt->brt_addr[3], brt->brt_addr[4], brt->brt_addr[5], 
+               brt->brt_ifp_proxysta ? brt->brt_ifp_proxysta->if_xname : "unknown",
+               brt->brt_ifp->if_xname );
+#endif
+               bridge_proxysta_idle_timeout( brt->brt_ifp, brt->brt_addr );    
+       }
+#endif
+       
+       LIST_REMOVE(brt, brt_hash);
+       
+       LIST_REMOVE(brt, brt_list);
+       sc->sc_brtcnt--;
+       zfree(bridge_rtnode_pool, brt);
+}
+
+static errno_t
+bridge_set_bpf_tap(ifnet_t ifp, bpf_tap_mode mode, bpf_packet_func bpf_callback)
+{
+       struct bridge_softc *sc = (struct bridge_softc *)ifnet_softc(ifp);
+       
+       //printf("bridge_set_bpf_tap ifp %p mode %d\n", ifp, mode);
+       
+       /* TBD locking */
+       if (sc == NULL || (sc->sc_flags & SCF_DETACHING)) {
+               return ENODEV;
+       }
+       
+       switch (mode) {
+               case BPF_TAP_DISABLE:
+                       sc->sc_bpf_input = sc->sc_bpf_output = NULL;
+                       break;
+                       
+               case BPF_TAP_INPUT:
+                       sc->sc_bpf_input = bpf_callback;
+                       break;
+                       
+               case BPF_TAP_OUTPUT:
+                       sc->sc_bpf_output = bpf_callback;
+                       break;
+                       
+               case BPF_TAP_INPUT_OUTPUT:
+                       sc->sc_bpf_input = sc->sc_bpf_output = bpf_callback;
+                       break;
+                       
+               default:
+                       break;
+       }
+       
+       return 0;
+}
+
+static void
+bridge_detach(__unused ifnet_t ifp)
+{
+       struct bridge_softc *sc = (struct bridge_softc *)ifnet_softc(ifp);
+       
+       /* Tear down the routing table. */
+       bridge_rtable_fini(sc);
+       
+       lck_rw_lock_exclusive(bridge_list_lock);
+       LIST_REMOVE(sc, sc_list);
+       lck_rw_done(bridge_list_lock);
+       
+       ifnet_release(ifp);
+       
+       lck_mtx_free(sc->sc_mtx, bridge_lock_grp);
+       
+       _FREE(sc, M_DEVBUF);
+       return;
+}
+
+__private_extern__ errno_t bridge_bpf_input(ifnet_t ifp, struct mbuf *m)
+{
+       struct bridge_softc *sc = (struct bridge_softc *)ifnet_softc(ifp);
+       
+       if (sc->sc_bpf_input) {
+               if (mbuf_pkthdr_rcvif(m) != ifp)
+                       printf("bridge_bpf_input rcvif: %p != ifp %p\n", mbuf_pkthdr_rcvif(m), ifp);
+               (*sc->sc_bpf_input)(ifp, m);
+       }
+       return 0;
+}
+
+__private_extern__ errno_t bridge_bpf_output(ifnet_t ifp, struct mbuf *m)
+{
+       struct bridge_softc *sc = (struct bridge_softc *)ifnet_softc(ifp);
+       
+       if (sc->sc_bpf_output) {
+               (*sc->sc_bpf_output)(ifp, m);
+       }
+       return 0;
+}
+
diff --git a/bsd/net/if_bridgevar.h b/bsd/net/if_bridgevar.h
new file mode 100644 (file)
index 0000000..6b47c92
--- /dev/null
@@ -0,0 +1,484 @@
+/*
+ * Copyright (c) 2004-2009 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+/*     $apfw: if_bridgevar,v 1.7 2008/10/24 02:34:06 cbzimmer Exp $ */
+/*     $NetBSD: if_bridgevar.h,v 1.8 2005/12/10 23:21:38 elad Exp $    */
+
+/*
+ * Copyright 2001 Wasabi Systems, Inc.
+ * All rights reserved.
+ *
+ * Written by Jason R. Thorpe for Wasabi Systems, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *     This product includes software developed for the NetBSD Project by
+ *     Wasabi Systems, Inc.
+ * 4. The name of Wasabi Systems, Inc. may not be used to endorse
+ *    or promote products derived from this software without specific prior
+ *    written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Copyright (c) 1999, 2000 Jason L. Wright (jason@thought.net)
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *     This product includes software developed by Jason L. Wright
+ * 4. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * OpenBSD: if_bridge.h,v 1.14 2001/03/22 03:48:29 jason Exp
+ */
+
+/*
+ * Data structure and control definitions for bridge interfaces.
+ */
+
+#ifndef _NET_IF_BRIDGEVAR_H_
+#define _NET_IF_BRIDGEVAR_H_
+
+#ifdef PRIVATE
+
+#include <sys/queue.h>
+
+#include <net/if.h>
+#include <net/ethernet.h>
+
+/*
+ * Commands used in the SIOCSDRVSPEC ioctl.  Note the lookup of the
+ * bridge interface itself is keyed off the ifdrv structure.
+ */
+#define        BRDGADD                 0       /* add bridge member (ifbreq) */
+#define        BRDGDEL                 1       /* delete bridge member (ifbreq) */
+#define        BRDGGIFFLGS             2       /* get member if flags (ifbreq) */
+#define        BRDGSIFFLGS             3       /* set member if flags (ifbreq) */
+#define        BRDGSCACHE              4       /* set cache size (ifbrparam) */
+#define        BRDGGCACHE              5       /* get cache size (ifbrparam) */
+#define        BRDGGIFS                6       /* get member list (ifbifconf) */
+#define        BRDGRTS                 7       /* get address list (ifbaconf) */
+#define        BRDGSADDR               8       /* set static address (ifbareq) */
+#define        BRDGSTO                 9       /* set cache timeout (ifbrparam) */
+#define        BRDGGTO                 10      /* get cache timeout (ifbrparam) */
+#define        BRDGDADDR               11      /* delete address (ifbareq) */
+#define        BRDGFLUSH               12      /* flush address cache (ifbreq) */
+
+#define        BRDGGPRI                13      /* get priority (ifbrparam) */
+#define        BRDGSPRI                14      /* set priority (ifbrparam) */
+#define        BRDGGHT                 15      /* get hello time (ifbrparam) */
+#define        BRDGSHT                 16      /* set hello time (ifbrparam) */
+#define        BRDGGFD                 17      /* get forward delay (ifbrparam) */
+#define        BRDGSFD                 18      /* set forward delay (ifbrparam) */
+#define        BRDGGMA                 19      /* get max age (ifbrparam) */
+#define        BRDGSMA                 20      /* set max age (ifbrparam) */
+#define        BRDGSIFPRIO             21      /* set if priority (ifbreq) */
+#define BRDGSIFCOST            22      /* set if path cost (ifbreq) */
+#define BRDGGFILT              23      /* get filter flags (ifbrparam) */
+#define BRDGSFILT              24      /* set filter flags (ifbrparam) */
+#define        BRDGPURGE               25      /* purge address cache for a particular interface (ifbreq) */
+
+/*
+ * Generic bridge control request.
+ */
+#pragma pack(4)
+
+struct ifbreq {
+       char            ifbr_ifsname[IFNAMSIZ]; /* member if name */
+       uint32_t        ifbr_ifsflags;          /* member if flags */
+        uint16_t        ifbr_portno;            /* member if port number */
+       uint8_t         ifbr_state;             /* member if STP state */
+       uint8_t         ifbr_priority;          /* member if STP priority */
+       uint8_t         ifbr_path_cost;         /* member if STP cost */
+};
+
+#pragma pack()
+
+/* BRDGGIFFLAGS, BRDGSIFFLAGS */
+#define        IFBIF_LEARNING          0x01    /* if can learn */
+#define        IFBIF_DISCOVER          0x02    /* if sends packets w/ unknown dest. */
+#define        IFBIF_STP               0x04    /* if participates in spanning tree */
+/* APPLE MODIFICATION <cbz@apple.com>
+ add the following bits for ProxySTA:
+ IFBIF_PROXYSTA, IFBIF_PROXYSTA_DISCOVER
+ add the following bits for Guest Network      
+ IFBIF_NO_FORWARDING
+ */
+#define        IFBIF_PROXYSTA                          0x08    /* if interface is a proxy sta */
+#define        IFBIF_PROXYSTA_DISCOVER         0x10    /* if interface is used to discover proxy sta candidates */
+#define        IFBIF_NO_FORWARDING                 0x20        /* if interface cannot forward traffic from one interface to the next */
+
+/* APPLE MODIFICATION <cbz@apple.com> 
+ add the following bits for ProxySTA:
+ PROXYSTA, PROXYSTA_DISCOVER
+ add the following bits for Guest Network      
+ NO_FORWARDING
+ this was...   
+ #define       IFBIFBITS       "\020\1LEARNING\2DISCOVER\3STP"
+ */
+#define        IFBIFBITS       "\020\1LEARNING\2DISCOVER\3STP\4PROXYSTA\5PROXYSTA_DISCOVER\6NO_FORWARDING"
+
+/* BRDGFLUSH */
+#define        IFBF_FLUSHDYN           0x00    /* flush learned addresses only */
+#define        IFBF_FLUSHALL           0x01    /* flush all addresses */
+
+/* BRDGSFILT */
+#define IFBF_FILT_USEIPF       0x00000001 /* run pfil hooks on the bridge
+interface */
+#define IFBF_FILT_MEMBER       0x00000002 /* run pfil hooks on the member
+interfaces */
+#define IFBF_FILT_ONLYIP       0x00000004 /* only pass IP[46] packets when
+pfil is enabled */
+#define IFBF_FILT_MASK         0x00000007 /* mask of valid values */
+
+
+/* APPLE MODIFICATION <jhw@apple.com>: Default is to pass non-IP packets. */
+#define        IFBF_FILT_DEFAULT       ( IFBF_FILT_USEIPF | IFBF_FILT_MEMBER )
+#if 0
+#define        IFBF_FILT_DEFAULT       (IFBF_FILT_USEIPF | \
+IFBF_FILT_MEMBER | \
+IFBF_FILT_ONLYIP)
+#endif
+
+/* STP port states */
+#define        BSTP_IFSTATE_DISABLED   0
+#define        BSTP_IFSTATE_LISTENING  1
+#define        BSTP_IFSTATE_LEARNING   2
+#define        BSTP_IFSTATE_FORWARDING 3
+#define        BSTP_IFSTATE_BLOCKING   4
+
+/*
+ * Interface list structure.
+ */
+
+#pragma pack(4)
+
+struct ifbifconf {
+       uint32_t        ifbic_len;      /* buffer size */
+       union {
+               caddr_t ifbicu_buf;
+               struct ifbreq *ifbicu_req;
+       } ifbic_ifbicu;
+#define        ifbic_buf       ifbic_ifbicu.ifbicu_buf
+#define        ifbic_req       ifbic_ifbicu.ifbicu_req
+};
+
+#ifdef KERNEL_PRIVATE
+struct ifbifconf32 {
+       uint32_t        ifbic_len;      /* buffer size */
+       union {
+               user32_addr_t   ifbicu_buf;
+               user32_addr_t   ifbicu_req;
+       } ifbic_ifbicu;
+};
+
+struct ifbifconf64 {
+       uint32_t        ifbic_len;      /* buffer size */
+       union {
+               user64_addr_t   ifbicu_buf;
+               user64_addr_t   ifbicu_req;
+       } ifbic_ifbicu;
+};
+#endif /* KERNEL_PRIVATE */
+
+#pragma pack()
+
+/*
+ * Bridge address request.
+ */
+
+#pragma pack(4)
+
+struct ifbareq {
+       char            ifba_ifsname[IFNAMSIZ]; /* member if name */
+       unsigned long   ifba_expire;            /* address expire time */
+       uint8_t         ifba_flags;             /* address flags */
+       uint8_t         ifba_dst[ETHER_ADDR_LEN];/* destination address */
+};
+
+#ifdef KERNEL_PRIVATE
+struct ifbareq32 {
+       char            ifba_ifsname[IFNAMSIZ]; /* member if name */
+       uint32_t        ifba_expire;            /* address expire time */
+       uint8_t         ifba_flags;             /* address flags */
+       uint8_t         ifba_dst[ETHER_ADDR_LEN];/* destination address */
+};
+
+struct ifbareq64 {
+       char            ifba_ifsname[IFNAMSIZ]; /* member if name */
+       uint64_t        ifba_expire;            /* address expire time */
+       uint8_t         ifba_flags;             /* address flags */
+       uint8_t         ifba_dst[ETHER_ADDR_LEN];/* destination address */
+};
+#endif /* KERNEL_PRIVATE */
+
+#pragma pack()
+
+#define        IFBAF_TYPEMASK  0x03    /* address type mask */
+#define        IFBAF_DYNAMIC   0x00    /* dynamically learned address */
+#define        IFBAF_STATIC    0x01    /* static address */
+
+#define        IFBAFBITS       "\020\1STATIC"
+
+/*
+ * Address list structure.
+ */
+
+#pragma pack(4)
+
+struct ifbaconf {
+       uint32_t        ifbac_len;      /* buffer size */
+       union {
+               caddr_t ifbacu_buf;
+               struct ifbareq *ifbacu_req;
+       } ifbac_ifbacu;
+#define        ifbac_buf       ifbac_ifbacu.ifbacu_buf
+#define        ifbac_req       ifbac_ifbacu.ifbacu_req
+};
+
+#ifdef KERNEL_PRIVATE
+struct ifbaconf32 {
+       uint32_t        ifbac_len;      /* buffer size */
+       union {
+               user32_addr_t   ifbacu_buf;
+               user32_addr_t   ifbacu_req;
+       } ifbac_ifbacu;
+};
+
+struct ifbaconf64 {
+       uint32_t        ifbac_len;      /* buffer size */
+       union {
+               user64_addr_t   ifbacu_buf;
+               user64_addr_t   ifbacu_req;
+       } ifbac_ifbacu;
+};
+#endif /* KERNEL_PRIVATE */
+
+#pragma pack()
+
+/*
+ * Bridge parameter structure.
+ */
+
+#pragma pack(4)
+
+struct ifbrparam {
+       union {
+               uint32_t ifbrpu_int32;
+               uint16_t ifbrpu_int16;
+               uint8_t ifbrpu_int8;
+       } ifbrp_ifbrpu;
+};
+
+#pragma pack()
+
+#define        ifbrp_csize     ifbrp_ifbrpu.ifbrpu_int32       /* cache size */
+#define        ifbrp_ctime     ifbrp_ifbrpu.ifbrpu_int32       /* cache time (sec) */
+#define        ifbrp_prio      ifbrp_ifbrpu.ifbrpu_int16       /* bridge priority */
+#define        ifbrp_hellotime ifbrp_ifbrpu.ifbrpu_int8        /* hello time (sec) */
+#define        ifbrp_fwddelay  ifbrp_ifbrpu.ifbrpu_int8        /* fwd time (sec) */
+#define        ifbrp_maxage    ifbrp_ifbrpu.ifbrpu_int8        /* max age (sec) */
+#define        ifbrp_filter    ifbrp_ifbrpu.ifbrpu_int32       /* filtering flags */
+
+#ifdef KERNEL
+/*
+ * Timekeeping structure used in spanning tree code.
+ */
+struct bridge_timer {
+       uint16_t        active;
+       uint16_t        value;
+};
+
+struct bstp_config_unit {
+       uint64_t        cu_rootid;
+       uint64_t        cu_bridge_id;
+       uint32_t        cu_root_path_cost;
+       uint16_t        cu_message_age;
+       uint16_t        cu_max_age;
+       uint16_t        cu_hello_time;
+       uint16_t        cu_forward_delay;
+       uint16_t        cu_port_id;
+       uint8_t         cu_message_type;
+       uint8_t         cu_topology_change_acknowledgment;
+       uint8_t         cu_topology_change;
+};
+
+struct bstp_tcn_unit {
+       uint8_t         tu_message_type;
+};
+
+struct bridge_softc;
+
+/*
+ * Bridge interface list entry.
+ * (VL) bridge_ifmember would be a better name, more descriptive
+ */
+struct bridge_iflist {
+       LIST_ENTRY(bridge_iflist) bif_next;
+       uint64_t                bif_designated_root;
+       uint64_t                bif_designated_bridge;
+       uint32_t                bif_path_cost;
+       uint32_t                bif_designated_cost;
+       struct bridge_timer     bif_hold_timer;
+       struct bridge_timer     bif_message_age_timer;
+       struct bridge_timer     bif_forward_delay_timer;
+       uint16_t                bif_port_id;
+       uint16_t                bif_designated_port;
+       struct bstp_config_unit bif_config_bpdu;
+       uint8_t                 bif_state;
+       uint8_t                 bif_topology_change_acknowledge;
+       uint8_t                 bif_config_pending;
+       uint8_t                 bif_change_detection_enabled;
+       uint8_t                 bif_priority;
+       struct ifnet    *bif_ifp;       /* member if */
+       uint32_t                bif_flags;      /* member if flags */
+       int                             bif_mutecap;    /* member muted caps */
+       interface_filter_t      bif_iff_ref;
+       struct bridge_softc *bif_sc;
+};
+
+/*
+ * Bridge route node.
+ */
+struct bridge_rtnode {
+       LIST_ENTRY(bridge_rtnode) brt_hash;     /* hash table linkage */
+       LIST_ENTRY(bridge_rtnode) brt_list;     /* list linkage */
+       struct ifnet            *brt_ifp;       /* destination if */
+       unsigned long           brt_expire;     /* expiration time */
+       uint8_t                 brt_flags;      /* address flags */
+       uint8_t                 brt_addr[ETHER_ADDR_LEN];
+       /* APPLE MODIFICATION <cbz@apple.com> - add the following elements:
+     brt_flags_ext, brt_ifp_proxysta */
+#define IFBAF_EXT_PROXYSTA  0x01
+       uint8_t                 brt_flags_ext;  /* extended flags */
+       struct ifnet    *brt_ifp_proxysta;      /* proxy sta if */
+};
+
+
+/*
+ * Software state for each bridge.
+ */
+struct bridge_softc {
+       LIST_ENTRY(bridge_softc) sc_list;
+       struct ifnet    *sc_if;
+       uint64_t                sc_designated_root;
+       uint64_t                sc_bridge_id;
+       struct bridge_iflist    *sc_root_port;
+       uint32_t                sc_root_path_cost;
+       uint16_t                sc_max_age;
+       uint16_t                sc_hello_time;
+       uint16_t                sc_forward_delay;
+       uint16_t                sc_bridge_max_age;
+       uint16_t                sc_bridge_hello_time;
+       uint16_t                sc_bridge_forward_delay;
+       uint16_t                sc_topology_change_time;
+       uint16_t                sc_hold_time;
+       uint16_t                sc_bridge_priority;
+       uint8_t                 sc_topology_change_detected;
+       uint8_t                 sc_topology_change;
+       struct bridge_timer     sc_hello_timer;
+       struct bridge_timer     sc_topology_change_timer;
+       struct bridge_timer     sc_tcn_timer;
+       uint32_t                sc_brtmax;      /* max # of addresses */
+       uint32_t                sc_brtcnt;      /* cur. # of addresses */
+       /* APPLE MODIFICATION <cbz@apple.com> - add the following elements:
+     sc_brtmax_proxysta */
+       uint32_t                sc_brtmax_proxysta;     /* max # of proxy sta addresses */
+       uint32_t                sc_brttimeout;  /* rt timeout in seconds */
+       LIST_HEAD(, bridge_iflist) sc_iflist;   /* member interface list */
+       LIST_HEAD(, bridge_rtnode) *sc_rthash;  /* our forwarding table */
+       LIST_HEAD(, bridge_rtnode) sc_rtlist;   /* list version of above */
+       uint32_t                sc_rthash_key;  /* key for hash */
+       uint32_t                sc_filter_flags; /* ipf and flags */
+    
+       //(VL)
+       char                    sc_if_xname[IFNAMSIZ];
+    bpf_packet_func    sc_bpf_input;
+    bpf_packet_func    sc_bpf_output;
+    u_int32_t          sc_flags;
+    lck_mtx_t          *sc_mtx;
+};
+
+#define SCF_DETACHING 0x1
+
+extern const uint8_t bstp_etheraddr[];
+
+int    bridgeattach(int);
+void   bridge_enqueue(struct bridge_softc *, struct ifnet *, struct mbuf *);
+void   bridge_rtdelete(struct bridge_softc *, struct ifnet *, int);
+
+void   bstp_initialization(struct bridge_softc *);
+void   bstp_stop(struct bridge_softc *);
+struct mbuf *bstp_input(struct bridge_softc *, struct ifnet *, struct mbuf *);
+
+
+#endif /* KERNEL */
+#endif /* PRIVATE */
+#endif /* !_NET_IF_BRIDGEVAR_H_ */
+
index 8d82c530d45bbd33ce4eff5c8229f1b0426cd048..e407e009fa8fe6e3d498bc026affbd3cef26aa88 100644 (file)
@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000, 2009 Apple Computer, Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
 extern struct ifqueue pkintrq;
 #endif
 
 extern struct ifqueue pkintrq;
 #endif
 
-#if BRIDGE
-#include <net/bridge.h>
-#endif
-
 /* #include "vlan.h" */
 #if NVLAN > 0
 #include <net/if_vlan_var.h>
 /* #include "vlan.h" */
 #if NVLAN > 0
 #include <net/if_vlan_var.h>
index 7b0d446e2d92c2ece31bfa5b4230fee59f9fa863..dade70621211095fa54b709140b9358590422a85 100644 (file)
@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000,2009 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- *    must display the following acknowledgement:
- *     This product includes software developed by the University of
- *     California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
+ * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
@@ -57,7 +53,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- *      @(#)if_llc.h   8.1 (Berkeley) 6/10/93
+ *     @(#)if_llc.h    8.1 (Berkeley) 6/10/93
  */
 
 #ifndef _NET_IF_LLC_H_
  */
 
 #ifndef _NET_IF_LLC_H_
  */
 
 struct llc {
  */
 
 struct llc {
-       u_char  llc_dsap;
-       u_char  llc_ssap;
+       u_int8_t llc_dsap;
+       u_int8_t llc_ssap;
        union {
            struct {
        union {
            struct {
-               u_char control;
-               u_char format_id;
-               u_char class_id;
-               u_char window_x2;
+               u_int8_t control;
+               u_int8_t format_id;
+               u_int8_t class_id;
+               u_int8_t window_x2;
            } type_u;
            struct {
            } type_u;
            struct {
-               u_char num_snd_x2;
-               u_char num_rcv_x2;
+               u_int8_t num_snd_x2;
+               u_int8_t num_rcv_x2;
            } type_i;
            struct {
            } type_i;
            struct {
-               u_char control;
-               u_char num_rcv_x2;
+               u_int8_t control;
+               u_int8_t num_rcv_x2;
            } type_s;
            struct {
            } type_s;
            struct {
-               u_char control;
-               struct frmrinfo {
-                       u_char rej_pdu_0;
-                       u_char rej_pdu_1;
-                       u_char frmr_control;
-                       u_char frmr_control_ext;
-                       u_char frmr_cause;
-               } frmrinfo;
+               u_int8_t control;
+               /*
+                * We cannot put the following fields in a structure because
+                * the structure rounding might cause padding.
+                */
+               u_int8_t frmr_rej_pdu0;
+               u_int8_t frmr_rej_pdu1;
+               u_int8_t frmr_control;
+               u_int8_t frmr_control_ext;
+               u_int8_t frmr_cause;
            } type_frmr;
            struct {
            } type_frmr;
            struct {
-               u_char control;
-               u_char org_code[3];
-               u_short ether_type;
-           } type_snap;
+               u_int8_t  control;
+               u_int8_t  org_code[3];
+               u_int16_t ether_type;
+           } type_snap __attribute__((__packed__));
            struct {
            struct {
-               u_char control;
-               u_char control_ext;
+               u_int8_t control;
+               u_int8_t control_ext;
            } type_raw;
        } llc_un;
            } type_raw;
        } llc_un;
-};
-#define llc_control            llc_un.type_u.control
-#define        llc_control_ext        llc_un.type_raw.control_ext
-#define llc_fid                llc_un.type_u.format_id
-#define llc_class              llc_un.type_u.class_id
-#define llc_window             llc_un.type_u.window_x2
-#define llc_frmrinfo           llc_un.type_frmr.frmrinfo
-#define llc_frmr_pdu0          llc_un.type_frmr.frmrinfo.rej_pdu0
-#define llc_frmr_pdu1          llc_un.type_frmr.frmrinfo.rej_pdu1
-#define llc_frmr_control       llc_un.type_frmr.frmrinfo.frmr_control
-#define llc_frmr_control_ext   llc_un.type_frmr.frmrinfo.frmr_control_ext
-#define llc_frmr_cause         llc_un.type_frmr.frmrinfo.frmr_control_ext
+} __attribute__((__packed__));
+
+struct frmrinfo {
+       u_int8_t frmr_rej_pdu0;
+       u_int8_t frmr_rej_pdu1;
+       u_int8_t frmr_control;
+       u_int8_t frmr_control_ext;
+       u_int8_t frmr_cause;
+} __attribute__((__packed__));
+
+#define        llc_control             llc_un.type_u.control
+#define        llc_control_ext         llc_un.type_raw.control_ext
+#define        llc_fid                 llc_un.type_u.format_id
+#define        llc_class               llc_un.type_u.class
+#define        llc_window              llc_un.type_u.window_x2
+#define        llc_frmrinfo            llc_un.type_frmr.frmr_rej_pdu0
+#define        llc_frmr_pdu0           llc_un.type_frmr.frmr_rej_pdu0
+#define        llc_frmr_pdu1           llc_un.type_frmr.frmr_rej_pdu1
+#define        llc_frmr_control        llc_un.type_frmr.frmr_control
+#define        llc_frmr_control_ext    llc_un.type_frmr.frmr_control_ext
+#define        llc_frmr_cause          llc_un.type_frmr.frmr_cause
+#define        llc_snap                llc_un.type_snap
 
 /*
  * Don't use sizeof(struct llc_un) for LLC header sizes
 
 /*
  * Don't use sizeof(struct llc_un) for LLC header sizes
@@ -129,6 +137,7 @@ struct llc {
 #define LLC_ISFRAMELEN 4
 #define LLC_UFRAMELEN  3
 #define LLC_FRMRLEN    7
 #define LLC_ISFRAMELEN 4
 #define LLC_UFRAMELEN  3
 #define LLC_FRMRLEN    7
+#define LLC_SNAPFRAMELEN 8
 
 /*
  * Unnumbered LLC format commands
 
 /*
  * Unnumbered LLC format commands
@@ -165,8 +174,22 @@ struct llc {
 /*
  * ISO PDTR 10178 contains among others
  */
 /*
  * ISO PDTR 10178 contains among others
  */
+#define        LLC_8021D_LSAP  0x42
 #define LLC_X25_LSAP   0x7e
 #define LLC_SNAP_LSAP  0xaa
 #define LLC_ISO_LSAP   0xfe
 
 #define LLC_X25_LSAP   0x7e
 #define LLC_SNAP_LSAP  0xaa
 #define LLC_ISO_LSAP   0xfe
 
-#endif
+/*
+ * LLC XID definitions from 802.2, as needed
+ */
+
+#define LLC_XID_FORMAT_BASIC   0x81
+#define LLC_XID_BASIC_MINLEN   (LLC_UFRAMELEN + 3)
+
+#define LLC_XID_CLASS_I        0x1
+#define LLC_XID_CLASS_II       0x3
+#define LLC_XID_CLASS_III      0x5
+#define LLC_XID_CLASS_IV       0x7
+
+
+#endif /* !_NET_IF_LLC_H_ */
index a8b580130dc755899a07d154ee7e7ca02e2c84a7..4eced169bd24793b197a0c18fbaa01849731dcb3 100644 (file)
@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (c) 2000-2008 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
 #define        IFT_L2VLAN      0x87            /* Layer 2 Virtual LAN using 802.1Q */
 #define IFT_IEEE8023ADLAG 0x88         /* IEEE802.3ad Link Aggregate */
 #define        IFT_IEEE1394    0x90            /* IEEE1394 High Performance SerialBus*/
 #define        IFT_L2VLAN      0x87            /* Layer 2 Virtual LAN using 802.1Q */
 #define IFT_IEEE8023ADLAG 0x88         /* IEEE802.3ad Link Aggregate */
 #define        IFT_IEEE1394    0x90            /* IEEE1394 High Performance SerialBus*/
+#define IFT_BRIDGE     0xd1            /* Transparent bridge interface */
 
 /*
  * These are not based on IANA assignments:
 
 /*
  * These are not based on IANA assignments:
index 67d52d0a22ca1456231e3e911c51d5994001d5f6..0601b78729a0384523eb482db6eaba8261a78b14 100644 (file)
@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -487,6 +487,7 @@ struct ifnet {
        void            *if_fwd_route_lock;
 #endif
        struct route    if_fwd_route;   /* cached IPv4 forwarding route */
        void            *if_fwd_route_lock;
 #endif
        struct route    if_fwd_route;   /* cached IPv4 forwarding route */
+       void    *if_bridge;             /* bridge glue */
 };
 
 #ifndef __APPLE__
 };
 
 #ifndef __APPLE__
index e1be1efd0326aeb7483b7e89176884f3e2e202fb..8ebcfb84189bd7e5d08f00482dfc41fed2f2c50a 100644 (file)
@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (c) 2003-2008 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2003-2009 Apple Computer, Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
 #include <net/multicast_list.h>
 #include <net/ether_if_module.h>
 
 #include <net/multicast_list.h>
 #include <net/ether_if_module.h>
 
-#define        IF_MAXUNIT              0x7fff  /* historical value */
-
 #define VLANNAME       "vlan"
 
 typedef int (bpf_callback_func)(struct ifnet *, struct mbuf *);
 #define VLANNAME       "vlan"
 
 typedef int (bpf_callback_func)(struct ifnet *, struct mbuf *);
index cbc32f35dd242ed8e472dc27e33deac951a3fc61..5529d8056cf39b798a45afdf5d5e99e7a60cf246 100644 (file)
@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (c) 2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2009 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
@@ -26,7 +26,7 @@
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
 
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
 
-/*     $apfw: pf.c,v 1.37 2008/12/05 23:10:20 jhw Exp $ */
+/*     $apfw: git commit 7c8016ea91f7b68950cf41729c92dd8e3e423ba7 $ */
 /*     $OpenBSD: pf.c,v 1.567 2008/02/20 23:40:13 henning Exp $ */
 
 /*
 /*     $OpenBSD: pf.c,v 1.567 2008/02/20 23:40:13 henning Exp $ */
 
 /*
@@ -272,7 +272,7 @@ static int           pf_test_state_tcp(struct pf_state **, int,
                            void *, struct pf_pdesc *, u_short *);
 static int              pf_test_state_udp(struct pf_state **, int,
                            struct pfi_kif *, struct mbuf *, int,
                            void *, struct pf_pdesc *, u_short *);
 static int              pf_test_state_udp(struct pf_state **, int,
                            struct pfi_kif *, struct mbuf *, int,
-                           void *, struct pf_pdesc *);
+                           void *, struct pf_pdesc *, u_short *);
 static int              pf_test_state_icmp(struct pf_state **, int,
                            struct pfi_kif *, struct mbuf *, int,
                            void *, struct pf_pdesc *, u_short *);
 static int              pf_test_state_icmp(struct pf_state **, int,
                            struct pfi_kif *, struct mbuf *, int,
                            void *, struct pf_pdesc *, u_short *);
@@ -469,22 +469,32 @@ pf_state_lookup_aux(struct pf_state **state, struct pfi_kif *kif,
 #define BOUND_IFACE(r, k) \
        ((r)->rule_flag & PFRULE_IFBOUND) ? (k) : pfi_all
 
 #define BOUND_IFACE(r, k) \
        ((r)->rule_flag & PFRULE_IFBOUND) ? (k) : pfi_all
 
-#define STATE_INC_COUNTERS(s)                          \
-       do {                                            \
-               s->rule.ptr->states++;                  \
-               if (s->anchor.ptr != NULL)              \
-                       s->anchor.ptr->states++;        \
-               if (s->nat_rule.ptr != NULL)            \
-                       s->nat_rule.ptr->states++;      \
+#define STATE_INC_COUNTERS(s)                                  \
+       do {                                                    \
+               s->rule.ptr->states++;                          \
+               VERIFY(s->rule.ptr->states != 0);               \
+               if (s->anchor.ptr != NULL) {                    \
+                       s->anchor.ptr->states++;                \
+                       VERIFY(s->anchor.ptr->states != 0);     \
+               }                                               \
+               if (s->nat_rule.ptr != NULL) {                  \
+                       s->nat_rule.ptr->states++;              \
+                       VERIFY(s->nat_rule.ptr->states != 0);   \
+               }                                               \
        } while (0)
 
        } while (0)
 
-#define STATE_DEC_COUNTERS(s)                          \
-       do {                                            \
-               if (s->nat_rule.ptr != NULL)            \
-                       s->nat_rule.ptr->states--;      \
-               if (s->anchor.ptr != NULL)              \
-                       s->anchor.ptr->states--;        \
-               s->rule.ptr->states--;                  \
+#define STATE_DEC_COUNTERS(s)                                  \
+       do {                                                    \
+               if (s->nat_rule.ptr != NULL) {                  \
+                       VERIFY(s->nat_rule.ptr->states > 0);    \
+                       s->nat_rule.ptr->states--;              \
+               }                                               \
+               if (s->anchor.ptr != NULL) {                    \
+                       VERIFY(s->anchor.ptr->states > 0);      \
+                       s->anchor.ptr->states--;                \
+               }                                               \
+               VERIFY(s->rule.ptr->states > 0);                \
+               s->rule.ptr->states--;                          \
        } while (0)
 
 static __inline int pf_src_compare(struct pf_src_node *, struct pf_src_node *);
        } while (0)
 
 static __inline int pf_src_compare(struct pf_src_node *, struct pf_src_node *);
@@ -512,8 +522,8 @@ RB_GENERATE(pf_state_tree_id, pf_state,
 #define        PF_DT_SKIP_EXTGWY       0x02
 
 #ifndef NO_APPLE_EXTENSIONS
 #define        PF_DT_SKIP_EXTGWY       0x02
 
 #ifndef NO_APPLE_EXTENSIONS
-static const u_int16_t PF_PPTP_PORT = htons(1723);
-static const u_int32_t PF_PPTP_MAGIC_NUMBER = htonl(0x1A2B3C4D);
+static const u_int16_t PF_PPTP_PORT = 1723;
+static const u_int32_t PF_PPTP_MAGIC_NUMBER = 0x1A2B3C4D;
 
 struct pf_pptp_hdr {
        u_int16_t       length;
 
 struct pf_pptp_hdr {
        u_int16_t       length;
@@ -762,7 +772,7 @@ struct pf_grev1_hdr {
        */
 };
 
        */
 };
 
-static const u_int16_t PF_IKE_PORT = htons(500);
+static const u_int16_t PF_IKE_PORT = 500;
 
 struct pf_ike_hdr {
        u_int64_t initiator_cookie, responder_cookie;
 
 struct pf_ike_hdr {
        u_int64_t initiator_cookie, responder_cookie;
@@ -1351,6 +1361,7 @@ pf_src_connlimit(struct pf_state **state)
        int bad = 0;
 
        (*state)->src_node->conn++;
        int bad = 0;
 
        (*state)->src_node->conn++;
+       VERIFY((*state)->src_node->conn != 0);
        (*state)->src.tcp_est = 1;
        pf_add_threshold(&(*state)->src_node->conn_rate);
 
        (*state)->src.tcp_est = 1;
        pf_add_threshold(&(*state)->src_node->conn_rate);
 
@@ -1612,6 +1623,7 @@ pf_insert_state(struct pfi_kif *kif, struct pf_state *s)
        TAILQ_INSERT_TAIL(&state_list, s, entry_list);
        pf_status.fcounters[FCNT_STATE_INSERT]++;
        pf_status.states++;
        TAILQ_INSERT_TAIL(&state_list, s, entry_list);
        pf_status.fcounters[FCNT_STATE_INSERT]++;
        pf_status.states++;
+       VERIFY(pf_status.states != 0);
        pfi_kif_ref(kif, PFI_KIF_REF_STATE);
 #if NPFSYNC
        pfsync_insert_state(s);
        pfi_kif_ref(kif, PFI_KIF_REF_STATE);
 #if NPFSYNC
        pfsync_insert_state(s);
@@ -1751,8 +1763,11 @@ pf_src_tree_remove_state(struct pf_state *s)
        lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED);
 
        if (s->src_node != NULL) {
        lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED);
 
        if (s->src_node != NULL) {
-               if (s->src.tcp_est)
+               if (s->src.tcp_est) {
+                       VERIFY(s->src_node->conn > 0);
                        --s->src_node->conn;
                        --s->src_node->conn;
+               }
+               VERIFY(s->src_node->states > 0);
                if (--s->src_node->states <= 0) {
                        t = s->rule.ptr->timeout[PFTM_SRC_NODE];
                        if (!t)
                if (--s->src_node->states <= 0) {
                        t = s->rule.ptr->timeout[PFTM_SRC_NODE];
                        if (!t)
@@ -1761,6 +1776,7 @@ pf_src_tree_remove_state(struct pf_state *s)
                }
        }
        if (s->nat_src_node != s->src_node && s->nat_src_node != NULL) {
                }
        }
        if (s->nat_src_node != s->src_node && s->nat_src_node != NULL) {
+               VERIFY(s->nat_src_node->states > 0);
                if (--s->nat_src_node->states <= 0) {
                        t = s->rule.ptr->timeout[PFTM_SRC_NODE];
                        if (!t)
                if (--s->nat_src_node->states <= 0) {
                        t = s->rule.ptr->timeout[PFTM_SRC_NODE];
                        if (!t)
@@ -1819,16 +1835,21 @@ pf_free_state(struct pf_state *cur)
                return;
 #endif
        VERIFY(cur->timeout == PFTM_UNLINKED);
                return;
 #endif
        VERIFY(cur->timeout == PFTM_UNLINKED);
+       VERIFY(cur->rule.ptr->states > 0);
        if (--cur->rule.ptr->states <= 0 &&
            cur->rule.ptr->src_nodes <= 0)
                pf_rm_rule(NULL, cur->rule.ptr);
        if (--cur->rule.ptr->states <= 0 &&
            cur->rule.ptr->src_nodes <= 0)
                pf_rm_rule(NULL, cur->rule.ptr);
-       if (cur->nat_rule.ptr != NULL)
+       if (cur->nat_rule.ptr != NULL) {
+               VERIFY(cur->nat_rule.ptr->states > 0);
                if (--cur->nat_rule.ptr->states <= 0 &&
                    cur->nat_rule.ptr->src_nodes <= 0)
                        pf_rm_rule(NULL, cur->nat_rule.ptr);
                if (--cur->nat_rule.ptr->states <= 0 &&
                    cur->nat_rule.ptr->src_nodes <= 0)
                        pf_rm_rule(NULL, cur->nat_rule.ptr);
-       if (cur->anchor.ptr != NULL)
+       }
+       if (cur->anchor.ptr != NULL) {
+               VERIFY(cur->anchor.ptr->states > 0);
                if (--cur->anchor.ptr->states <= 0)
                        pf_rm_rule(NULL, cur->anchor.ptr);
                if (--cur->anchor.ptr->states <= 0)
                        pf_rm_rule(NULL, cur->anchor.ptr);
+       }
        pf_normalize_tcp_cleanup(cur);
        pfi_kif_unref(cur->kif, PFI_KIF_REF_STATE);
        TAILQ_REMOVE(&state_list, cur, entry_list);
        pf_normalize_tcp_cleanup(cur);
        pfi_kif_unref(cur->kif, PFI_KIF_REF_STATE);
        TAILQ_REMOVE(&state_list, cur, entry_list);
@@ -1836,6 +1857,7 @@ pf_free_state(struct pf_state *cur)
                pf_tag_unref(cur->tag);
        pool_put(&pf_state_pl, cur);
        pf_status.fcounters[FCNT_STATE_REMOVALS]++;
                pf_tag_unref(cur->tag);
        pool_put(&pf_state_pl, cur);
        pf_status.fcounters[FCNT_STATE_REMOVALS]++;
+       VERIFY(pf_status.states > 0);
        pf_status.states--;
 }
 
        pf_status.states--;
 }
 
@@ -3335,8 +3357,8 @@ pf_get_sport(sa_family_t af, u_int8_t proto, struct pf_rule *r,
        unsigned int cut;
        sa_family_t af = pd->af;
        u_int8_t proto = pd->proto;
        unsigned int cut;
        sa_family_t af = pd->af;
        u_int8_t proto = pd->proto;
-       unsigned int low = ntohs(r->rpool.proxy_port[0]);
-       unsigned int high = ntohs(r->rpool.proxy_port[1]);
+       unsigned int low = r->rpool.proxy_port[0];
+       unsigned int high = r->rpool.proxy_port[1];
 #else
        u_int16_t               cut;
 #endif
 #else
        u_int16_t               cut;
 #endif
@@ -3358,7 +3380,7 @@ pf_get_sport(sa_family_t af, u_int8_t proto, struct pf_rule *r,
        if (proto == IPPROTO_UDP) {
 
                /*--- Never float IKE source port ---*/
        if (proto == IPPROTO_UDP) {
 
                /*--- Never float IKE source port ---*/
-               if (sxport->port == PF_IKE_PORT) {
+               if (ntohs(sxport->port) == PF_IKE_PORT) {
                        nxport->port = sxport->port;
                        return (0);
                }
                        nxport->port = sxport->port;
                        return (0);
                }
@@ -3387,9 +3409,30 @@ pf_get_sport(sa_family_t af, u_int8_t proto, struct pf_rule *r,
                                return (0);
                        }
                }
                                return (0);
                        }
                }
+       } else if (proto == IPPROTO_TCP) {
+               struct pf_state* s;
+               /*
+                * APPLE MODIFICATION: <rdar://problem/6546358>
+                * Fix allows....NAT to use a single binding for TCP session
+                * with same source IP and source port
+                */
+               TAILQ_FOREACH(s, &state_list, entry_list) {
+                       struct pf_state_key* sk = s->state_key;
+                       if (!sk)
+                               continue;
+                       if (s->nat_rule.ptr != r)
+                               continue;
+                       if (sk->proto != IPPROTO_TCP || sk->af != af)
+                                continue;
+                       if (sk->lan.xport.port != sxport->port)
+                               continue;
+                       if (!(PF_AEQ(&sk->lan.addr, saddr, af)))
+                               continue;
+                       nxport->port = sk->gwy.xport.port;
+                       return (0);
+               }
        }
 #endif
        }
 #endif
-
        do {
                key.af = af;
                key.proto = proto;
        do {
                key.af = af;
                key.proto = proto;
@@ -3411,7 +3454,6 @@ pf_get_sport(sa_family_t af, u_int8_t proto, struct pf_rule *r,
 #else
                key.ext.port = dport;
 #endif
 #else
                key.ext.port = dport;
 #endif
-
                /*
                 * port search; start random, step;
                 * similar 2 portloop in in_pcbbind
                /*
                 * port search; start random, step;
                 * similar 2 portloop in in_pcbbind
@@ -3577,8 +3619,8 @@ pf_match_translation(struct pf_pdesc *pd, struct mbuf *m, int off,
                    src->neg, kif))
                        r = r->skip[src == &r->src ? PF_SKIP_SRC_ADDR :
                            PF_SKIP_DST_ADDR].ptr;
                    src->neg, kif))
                        r = r->skip[src == &r->src ? PF_SKIP_SRC_ADDR :
                            PF_SKIP_DST_ADDR].ptr;
-               else if (!pf_match_xport(r->proto, r->proto_variant, &src->xport,
-                       sxport))
+               else if (!pf_match_xport(r->proto,
+                   r->proto_variant, &src->xport, sxport))
 #else
                else if (PF_MISMATCHAW(&src->addr, saddr, pd->af,
                    src->neg, kif))
 #else
                else if (PF_MISMATCHAW(&src->addr, saddr, pd->af,
                    src->neg, kif))
@@ -3945,12 +3987,42 @@ pf_socket_lookup(int direction, struct pf_pdesc *pd)
        case AF_INET:
                inp = in_pcblookup_hash(pi, saddr->v4, sport, daddr->v4, dport,
                    0, NULL);
        case AF_INET:
                inp = in_pcblookup_hash(pi, saddr->v4, sport, daddr->v4, dport,
                    0, NULL);
+#if INET6
+               if (inp == NULL) {
+                       struct in6_addr s6, d6;
+
+                       memset(&s6, 0, sizeof (s6));
+                       s6.s6_addr16[5] = htons(0xffff);
+                       memcpy(&s6.s6_addr32[3], &saddr->v4,
+                           sizeof (saddr->v4));
+
+                       memset(&d6, 0, sizeof (d6));
+                       d6.s6_addr16[5] = htons(0xffff);
+                       memcpy(&d6.s6_addr32[3], &daddr->v4,
+                           sizeof (daddr->v4));
+
+                       inp = in6_pcblookup_hash(pi, &s6, sport,
+                           &d6, dport, 0, NULL);
+                       if (inp == NULL) {
+                               inp = in_pcblookup_hash(pi, saddr->v4, sport,
+                                   daddr->v4, dport, INPLOOKUP_WILDCARD, NULL);
+                               if (inp == NULL) {
+                                       inp = in6_pcblookup_hash(pi, &s6, sport,
+                                           &d6, dport, INPLOOKUP_WILDCARD,
+                                           NULL);
+                                       if (inp == NULL)
+                                               return (-1);
+                               }
+                       }
+               }
+#else
                if (inp == NULL) {
                        inp = in_pcblookup_hash(pi, saddr->v4, sport,
                            daddr->v4, dport, INPLOOKUP_WILDCARD, NULL);
                        if (inp == NULL)
                                return (-1);
                }
                if (inp == NULL) {
                        inp = in_pcblookup_hash(pi, saddr->v4, sport,
                            daddr->v4, dport, INPLOOKUP_WILDCARD, NULL);
                        if (inp == NULL)
                                return (-1);
                }
+#endif /* !INET6 */
                break;
 #endif /* INET */
 #if INET6
                break;
 #endif /* INET */
 #if INET6
@@ -4983,8 +5055,8 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction,
                        struct udphdr *uh = pd->hdr.udp;
                        size_t plen = m->m_pkthdr.len - off - sizeof (*uh);
 
                        struct udphdr *uh = pd->hdr.udp;
                        size_t plen = m->m_pkthdr.len - off - sizeof (*uh);
 
-                       if (uh->uh_sport == PF_IKE_PORT &&
-                           uh->uh_dport == PF_IKE_PORT &&
+                       if (ntohs(uh->uh_sport) == PF_IKE_PORT &&
+                           ntohs(uh->uh_dport) == PF_IKE_PORT &&
                            plen >= PF_IKE_PACKET_MINSIZE) {
                                if (plen > PF_IKE_PACKET_MINSIZE)
                                        plen = PF_IKE_PACKET_MINSIZE;
                            plen >= PF_IKE_PACKET_MINSIZE) {
                                if (plen > PF_IKE_PACKET_MINSIZE)
                                        plen = PF_IKE_PACKET_MINSIZE;
@@ -5154,11 +5226,13 @@ cleanup:
                if (sn != NULL) {
                        s->src_node = sn;
                        s->src_node->states++;
                if (sn != NULL) {
                        s->src_node = sn;
                        s->src_node->states++;
+                       VERIFY(s->src_node->states != 0);
                }
                if (nsn != NULL) {
                        PF_ACPY(&nsn->raddr, &pd->naddr, af);
                        s->nat_src_node = nsn;
                        s->nat_src_node->states++;
                }
                if (nsn != NULL) {
                        PF_ACPY(&nsn->raddr, &pd->naddr, af);
                        s->nat_src_node = nsn;
                        s->nat_src_node->states++;
+                       VERIFY(s->nat_src_node->states != 0);
                }
                if (pd->proto == IPPROTO_TCP) {
                        if ((pd->flags & PFDESC_TCP_NORM) &&
                }
                if (pd->proto == IPPROTO_TCP) {
                        if ((pd->flags & PFDESC_TCP_NORM) &&
@@ -5195,8 +5269,8 @@ cleanup:
                sk->af = af;
 #ifndef NO_APPLE_EXTENSIONS
                if (pd->proto == IPPROTO_UDP) {
                sk->af = af;
 #ifndef NO_APPLE_EXTENSIONS
                if (pd->proto == IPPROTO_UDP) {
-                       if (pd->hdr.udp->uh_sport == PF_IKE_PORT &&
-                           pd->hdr.udp->uh_dport == PF_IKE_PORT) {
+                       if (ntohs(pd->hdr.udp->uh_sport) == PF_IKE_PORT &&
+                           ntohs(pd->hdr.udp->uh_dport) == PF_IKE_PORT) {
                                sk->proto_variant = PF_EXTFILTER_APD;
                        } else {
                                sk->proto_variant = nr ? nr->extfilter :
                                sk->proto_variant = PF_EXTFILTER_APD;
                        } else {
                                sk->proto_variant = nr ? nr->extfilter :
@@ -5323,7 +5397,8 @@ cleanup:
                                u_int16_t dport = (direction == PF_OUT) ?
                                    sk->ext.xport.port : sk->gwy.xport.port;
 
                                u_int16_t dport = (direction == PF_OUT) ?
                                    sk->ext.xport.port : sk->gwy.xport.port;
 
-                               if (nr != NULL && dport == PF_PPTP_PORT) {
+                               if (nr != NULL &&
+                                   ntohs(dport) == PF_PPTP_PORT) {
                                        struct pf_app_state *as;
 
                                        as = pool_get(&pf_app_state_pl,
                                        struct pf_app_state *as;
 
                                        as = pool_get(&pf_app_state_pl,
@@ -5349,8 +5424,9 @@ cleanup:
                        case IPPROTO_UDP: {
                                struct udphdr *uh = pd->hdr.udp;
 
                        case IPPROTO_UDP: {
                                struct udphdr *uh = pd->hdr.udp;
 
-                               if (nr != NULL && uh->uh_sport == PF_IKE_PORT &&
-                                   uh->uh_dport == PF_IKE_PORT) {
+                               if (nr != NULL &&
+                                   ntohs(uh->uh_sport) == PF_IKE_PORT &&
+                                   ntohs(uh->uh_dport) == PF_IKE_PORT) {
                                        struct pf_app_state *as;
 
                                        as = pool_get(&pf_app_state_pl,
                                        struct pf_app_state *as;
 
                                        as = pool_get(&pf_app_state_pl,
@@ -5614,9 +5690,9 @@ pf_pptp_handler(struct pf_state *s, int direction, int off,
        as = &s->state_key->app_state->u.pptp;
        m_copydata(m, off, plen, &cm);
 
        as = &s->state_key->app_state->u.pptp;
        m_copydata(m, off, plen, &cm);
 
-       if (cm.hdr.magic != PF_PPTP_MAGIC_NUMBER)
+       if (ntohl(cm.hdr.magic) != PF_PPTP_MAGIC_NUMBER)
                return;
                return;
-       if (cm.hdr.type != htons(1))
+       if (ntohs(cm.hdr.type) != 1)
                return;
 
        sk = s->state_key;
                return;
 
        sk = s->state_key;
@@ -5659,6 +5735,7 @@ pf_pptp_handler(struct pf_state *s, int direction, int off,
                gsk->gwy.xport.call_id = 0;
                gsk->ext.xport.call_id = 0;
 
                gsk->gwy.xport.call_id = 0;
                gsk->ext.xport.call_id = 0;
 
+               STATE_INC_COUNTERS(gs);
                as->grev1_state = gs;
        } else {
                gsk = gs->state_key;
                as->grev1_state = gs;
        } else {
                gsk = gs->state_key;
@@ -5816,8 +5893,12 @@ pf_pptp_handler(struct pf_state *s, int direction, int off,
                }
 
                m = pf_lazy_makewritable(pd, m, off + plen);
                }
 
                m = pf_lazy_makewritable(pd, m, off + plen);
-               if (!m)
+               if (!m) {
+                       as->grev1_state = NULL;
+                       STATE_DEC_COUNTERS(gs);
+                       pool_put(&pf_state_pl, gs);
                        return;
                        return;
+               }
                m_copyback(m, off, plen, &cm);
        }
 
                m_copyback(m, off, plen, &cm);
        }
 
@@ -5835,8 +5916,14 @@ pf_pptp_handler(struct pf_state *s, int direction, int off,
                gs->creation = pf_time_second();
                gs->expire = pf_time_second();
                gs->timeout = PFTM_GREv1_FIRST_PACKET;
                gs->creation = pf_time_second();
                gs->expire = pf_time_second();
                gs->timeout = PFTM_GREv1_FIRST_PACKET;
-               if (gs->src_node) ++gs->src_node->states;
-               if (gs->nat_src_node) ++gs->nat_src_node->states;
+               if (gs->src_node != NULL) {
+                       ++gs->src_node->states;
+                       VERIFY(gs->src_node->states != 0);
+               }
+               if (gs->nat_src_node != NULL) {
+                       ++gs->nat_src_node->states;
+                       VERIFY(gs->nat_src_node->states != 0);
+               }
                pf_set_rt_ifp(gs, &sk->lan.addr);
                if (pf_insert_state(BOUND_IFACE(s->rule.ptr, kif), gs)) {
 
                pf_set_rt_ifp(gs, &sk->lan.addr);
                if (pf_insert_state(BOUND_IFACE(s->rule.ptr, kif), gs)) {
 
@@ -5851,7 +5938,8 @@ pf_pptp_handler(struct pf_state *s, int direction, int off,
                         * succeed.  Failures are expected to be rare enough
                         * that fixing this is a low priority.
                         */
                         * succeed.  Failures are expected to be rare enough
                         * that fixing this is a low priority.
                         */
-
+                       as->grev1_state = NULL;
+                       pd->lmw = -1;
                        pf_src_tree_remove_state(gs);
                        STATE_DEC_COUNTERS(gs);
                        pool_put(&pf_state_pl, gs);
                        pf_src_tree_remove_state(gs);
                        STATE_DEC_COUNTERS(gs);
                        pool_put(&pf_state_pl, gs);
@@ -6105,9 +6193,27 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif,
                                            >> sws;
                                        dws = dst->wscale & PF_WSCALE_MASK;
                                } else {
                                            >> sws;
                                        dws = dst->wscale & PF_WSCALE_MASK;
                                } else {
+#ifndef NO_APPLE_MODIFICATION
+                                       /*
+                                        * <rdar://5786370>
+                                        *
+                                        * Window scale negotiation has failed,
+                                        * therefore we must restore the window
+                                        * scale in the state record that we
+                                        * optimistically removed in
+                                        * pf_test_rule().  Care is required to
+                                        * prevent arithmetic overflow from
+                                        * zeroing the window when it's
+                                        * truncated down to 16-bits.   --jhw
+                                        */
+                                       u_int32_t _win = dst->max_win;
+                                       _win <<= dst->wscale & PF_WSCALE_MASK;
+                                       dst->max_win = MIN(0xffff, _win);
+#else
                                        /* fixup other window */
                                        dst->max_win <<= dst->wscale &
                                            PF_WSCALE_MASK;
                                        /* fixup other window */
                                        dst->max_win <<= dst->wscale &
                                            PF_WSCALE_MASK;
+#endif
                                        /* in case of a retrans SYN|ACK */
                                        dst->wscale = 0;
                                }
                                        /* in case of a retrans SYN|ACK */
                                        dst->wscale = 0;
                                }
@@ -6125,9 +6231,16 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif,
                 * the crappy stack check or if we picked up the connection
                 * after establishment)
                 */
                 * the crappy stack check or if we picked up the connection
                 * after establishment)
                 */
+#ifndef NO_APPLE_MODIFICATIONS
+               if (src->seqhi == 1 ||
+                   SEQ_GEQ(end + MAX(1, (u_int32_t)dst->max_win << dws),
+                   src->seqhi))
+                       src->seqhi = end + MAX(1, (u_int32_t)dst->max_win << dws);
+#else
                if (src->seqhi == 1 ||
                    SEQ_GEQ(end + MAX(1, dst->max_win << dws), src->seqhi))
                        src->seqhi = end + MAX(1, dst->max_win << dws);
                if (src->seqhi == 1 ||
                    SEQ_GEQ(end + MAX(1, dst->max_win << dws), src->seqhi))
                        src->seqhi = end + MAX(1, dst->max_win << dws);
+#endif
                if (win > src->max_win)
                        src->max_win = win;
 
                if (win > src->max_win)
                        src->max_win = win;
 
@@ -6201,7 +6314,11 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif,
 #define MAXACKWINDOW (0xffff + 1500)   /* 1500 is an arbitrary fudge factor */
        if (SEQ_GEQ(src->seqhi, end) &&
            /* Last octet inside other's window space */
 #define MAXACKWINDOW (0xffff + 1500)   /* 1500 is an arbitrary fudge factor */
        if (SEQ_GEQ(src->seqhi, end) &&
            /* Last octet inside other's window space */
+#ifndef NO_APPLE_MODIFICATIONS
+           SEQ_GEQ(seq, src->seqlo - ((u_int32_t)dst->max_win << dws)) &&
+#else
            SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) &&
            SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) &&
+#endif
            /* Retrans: not more than one window back */
            (ackskew >= -MAXACKWINDOW) &&
            /* Acking not more than one reassembled fragment backwards */
            /* Retrans: not more than one window back */
            (ackskew >= -MAXACKWINDOW) &&
            /* Acking not more than one reassembled fragment backwards */
@@ -6229,9 +6346,13 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif,
                if (SEQ_GT(end, src->seqlo))
                        src->seqlo = end;
                /* slide the window of what the other end can send */
                if (SEQ_GT(end, src->seqlo))
                        src->seqlo = end;
                /* slide the window of what the other end can send */
+#ifndef NO_APPLE_MODIFICATIONS
+               if (SEQ_GEQ(ack + ((u_int32_t)win << sws), dst->seqhi))
+                       dst->seqhi = ack + MAX(((u_int32_t)win << sws), 1);
+#else
                if (SEQ_GEQ(ack + (win << sws), dst->seqhi))
                        dst->seqhi = ack + MAX((win << sws), 1);
                if (SEQ_GEQ(ack + (win << sws), dst->seqhi))
                        dst->seqhi = ack + MAX((win << sws), 1);
-
+#endif
 
                /* update states */
                if (th->th_flags & TH_SYN)
 
                /* update states */
                if (th->th_flags & TH_SYN)
@@ -6331,8 +6452,13 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif,
                if (SEQ_GT(end, src->seqlo))
                        src->seqlo = end;
                /* slide the window of what the other end can send */
                if (SEQ_GT(end, src->seqlo))
                        src->seqlo = end;
                /* slide the window of what the other end can send */
+#ifndef NO_APPLE_MODIFICATIONS
+               if (SEQ_GEQ(ack + ((u_int32_t)win << sws), dst->seqhi))
+                       dst->seqhi = ack + MAX(((u_int32_t)win << sws), 1);
+#else
                if (SEQ_GEQ(ack + (win << sws), dst->seqhi))
                        dst->seqhi = ack + MAX((win << sws), 1);
                if (SEQ_GEQ(ack + (win << sws), dst->seqhi))
                        dst->seqhi = ack + MAX((win << sws), 1);
+#endif
 
                /*
                 * Cannot set dst->seqhi here since this could be a shotgunned
 
                /*
                 * Cannot set dst->seqhi here since this could be a shotgunned
@@ -6374,7 +6500,12 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif,
                            "fwd" : "rev");
                        printf("pf: State failure on: %c %c %c %c | %c %c\n",
                            SEQ_GEQ(src->seqhi, end) ? ' ' : '1',
                            "fwd" : "rev");
                        printf("pf: State failure on: %c %c %c %c | %c %c\n",
                            SEQ_GEQ(src->seqhi, end) ? ' ' : '1',
+#ifndef NO_APPLE_MODIFICATIONS
+                           SEQ_GEQ(seq,
+                           src->seqlo - ((u_int32_t)dst->max_win << dws)) ?
+#else
                            SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) ?
                            SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) ?
+#endif
                            ' ': '2',
                            (ackskew >= -MAXACKWINDOW) ? ' ' : '3',
                            (ackskew <= (MAXACKWINDOW << sws)) ? ' ' : '4',
                            ' ': '2',
                            (ackskew >= -MAXACKWINDOW) ? ' ' : '3',
                            (ackskew <= (MAXACKWINDOW << sws)) ? ' ' : '4',
@@ -6447,7 +6578,7 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif,
 
 static int
 pf_test_state_udp(struct pf_state **state, int direction, struct pfi_kif *kif,
 
 static int
 pf_test_state_udp(struct pf_state **state, int direction, struct pfi_kif *kif,
-    struct mbuf *m, int off, void *h, struct pf_pdesc *pd)
+    struct mbuf *m, int off, void *h, struct pf_pdesc *pd, u_short *reason)
 {
 #pragma unused(h)
        struct pf_state_peer    *src, *dst;
 {
 #pragma unused(h)
        struct pf_state_peer    *src, *dst;
@@ -6487,7 +6618,8 @@ pf_test_state_udp(struct pf_state **state, int direction, struct pfi_kif *kif,
        }
 
 #ifndef NO_APPLE_EXTENSIONS
        }
 
 #ifndef NO_APPLE_EXTENSIONS
-       if (uh->uh_sport == PF_IKE_PORT && uh->uh_dport == PF_IKE_PORT) {
+       if (ntohs(uh->uh_sport) == PF_IKE_PORT &&
+           ntohs(uh->uh_dport) == PF_IKE_PORT) {
                struct pf_ike_hdr ike;
                size_t plen = m->m_pkthdr.len - off - sizeof (*uh);
                if (plen < PF_IKE_PACKET_MINSIZE) {
                struct pf_ike_hdr ike;
                size_t plen = m->m_pkthdr.len - off - sizeof (*uh);
                if (plen < PF_IKE_PACKET_MINSIZE) {
@@ -6570,6 +6702,10 @@ pf_test_state_udp(struct pf_state **state, int direction, struct pfi_kif *kif,
            (*state)->state_key->app_state->handler) {
                (*state)->state_key->app_state->handler(*state, direction,
                    off + uh->uh_ulen, pd, kif);
            (*state)->state_key->app_state->handler) {
                (*state)->state_key->app_state->handler(*state, direction,
                    off + uh->uh_ulen, pd, kif);
+               if (pd->lmw < 0) {
+                       REASON_SET(reason, PFRES_MEMORY);
+                       return (PF_DROP);
+               }
                m = pd->mp;
        }
 #endif
                m = pd->mp;
        }
 #endif
@@ -6968,7 +7104,12 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif,
                        }
 
                        if (!SEQ_GEQ(src->seqhi, seq) ||
                        }
 
                        if (!SEQ_GEQ(src->seqhi, seq) ||
+#ifndef NO_APPLE_MODIFICATION
+                           !SEQ_GEQ(seq,
+                           src->seqlo - ((u_int32_t)dst->max_win << dws))) {
+#else
                            !SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws))) {
                            !SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws))) {
+#endif
                                if (pf_status.debug >= PF_DEBUG_MISC) {
                                        printf("pf: BAD ICMP %d:%d ",
                                            icmptype, pd->hdr.icmp->icmp_code);
                                if (pf_status.debug >= PF_DEBUG_MISC) {
                                        printf("pf: BAD ICMP %d:%d ",
                                            icmptype, pd->hdr.icmp->icmp_code);
@@ -7081,8 +7222,8 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif,
 #ifndef NO_APPLE_EXTENSIONS
                        key.proto_variant = PF_EXTFILTER_APD;
 
 #ifndef NO_APPLE_EXTENSIONS
                        key.proto_variant = PF_EXTFILTER_APD;
 
-                       if (uh.uh_sport == PF_IKE_PORT &&
-                           uh.uh_dport == PF_IKE_PORT) {
+                       if (ntohs(uh.uh_sport) == PF_IKE_PORT &&
+                           ntohs(uh.uh_dport) == PF_IKE_PORT) {
                                struct pf_ike_hdr ike;
                                size_t plen =
                                    m->m_pkthdr.len - off2 - sizeof (uh);
                                struct pf_ike_hdr ike;
                                size_t plen =
                                    m->m_pkthdr.len - off2 - sizeof (uh);
@@ -8330,8 +8471,6 @@ pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p,
                        h = mtod(m, struct ip *);               \
                }                                               \
        } while (0)
                        h = mtod(m, struct ip *);               \
                }                                               \
        } while (0)
-#else
-#define PF_APPLE_UPDATE_PDESC_IPv4()
 #endif
 
 int
 #endif
 
 int
@@ -8439,9 +8578,13 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0,
                if ((th.th_flags & TH_ACK) && pd.p_len == 0)
                        pqid = 1;
                action = pf_normalize_tcp(dir, kif, m, 0, off, h, &pd);
                if ((th.th_flags & TH_ACK) && pd.p_len == 0)
                        pqid = 1;
                action = pf_normalize_tcp(dir, kif, m, 0, off, h, &pd);
-               if (action == PF_DROP)
+#ifndef NO_APPLE_EXTENSIONS
+               if (pd.lmw < 0)
                        goto done;
                PF_APPLE_UPDATE_PDESC_IPv4();
                        goto done;
                PF_APPLE_UPDATE_PDESC_IPv4();
+#endif
+               if (action == PF_DROP)
+                       goto done;
                action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd,
                    &reason);
 #ifndef NO_APPLE_EXTENSIONS
                action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd,
                    &reason);
 #ifndef NO_APPLE_EXTENSIONS
@@ -8478,7 +8621,8 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0,
                        REASON_SET(&reason, PFRES_SHORT);
                        goto done;
                }
                        REASON_SET(&reason, PFRES_SHORT);
                        goto done;
                }
-               action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd);
+               action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd,
+                   &reason);
 #ifndef NO_APPLE_EXTENSIONS
                if (pd.lmw < 0)
                        goto done;
 #ifndef NO_APPLE_EXTENSIONS
                if (pd.lmw < 0)
                        goto done;
@@ -8614,7 +8758,10 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0,
        }
 
 done:
        }
 
 done:
+#ifndef NO_APPLE_EXTENSIONS
+       *m0 = pd.mp;
        PF_APPLE_UPDATE_PDESC_IPv4();
        PF_APPLE_UPDATE_PDESC_IPv4();
+#endif
 
        if (action == PF_PASS && h->ip_hl > 5 &&
            !((s && s->allow_opts) || r->allow_opts)) {
 
        if (action == PF_PASS && h->ip_hl > 5 &&
            !((s && s->allow_opts) || r->allow_opts)) {
@@ -8732,8 +8879,15 @@ done:
        }
 
 #ifndef NO_APPLE_EXTENSIONS
        }
 
 #ifndef NO_APPLE_EXTENSIONS
+       VERIFY(m == NULL || pd.mp == NULL || pd.mp == m);
+
        if (*m0) {
                if (pd.lmw < 0) {
        if (*m0) {
                if (pd.lmw < 0) {
+                       REASON_SET(&reason, PFRES_MEMORY);
+                       action = PF_DROP;
+               }
+
+               if (action == PF_DROP) {
                        m_freem(*m0);
                        *m0 = NULL;
                        return (PF_DROP);
                        m_freem(*m0);
                        *m0 = NULL;
                        return (PF_DROP);
@@ -8766,8 +8920,6 @@ done:
                        h = mtod(m, struct ip6_hdr *);          \
                }                                               \
        } while (0)
                        h = mtod(m, struct ip6_hdr *);          \
                }                                               \
        } while (0)
-#else
-#define PF_APPLE_UPDATE_PDESC_IPv6()
 #endif
 
 int
 #endif
 
 int
@@ -8944,9 +9096,13 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0,
                }
                pd.p_len = pd.tot_len - off - (th.th_off << 2);
                action = pf_normalize_tcp(dir, kif, m, 0, off, h, &pd);
                }
                pd.p_len = pd.tot_len - off - (th.th_off << 2);
                action = pf_normalize_tcp(dir, kif, m, 0, off, h, &pd);
-               if (action == PF_DROP)
+#ifndef NO_APPLE_EXTENSIONS
+               if (pd.lmw < 0)
                        goto done;
                PF_APPLE_UPDATE_PDESC_IPv6();
                        goto done;
                PF_APPLE_UPDATE_PDESC_IPv6();
+#endif
+               if (action == PF_DROP)
+                       goto done;
                action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd,
                    &reason);
 #ifndef NO_APPLE_EXTENSIONS
                action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd,
                    &reason);
 #ifndef NO_APPLE_EXTENSIONS
@@ -8983,7 +9139,8 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0,
                        REASON_SET(&reason, PFRES_SHORT);
                        goto done;
                }
                        REASON_SET(&reason, PFRES_SHORT);
                        goto done;
                }
-               action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd);
+               action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd,
+                   &reason);
 #ifndef NO_APPLE_EXTENSIONS
                if (pd.lmw < 0)
                        goto done;
 #ifndef NO_APPLE_EXTENSIONS
                if (pd.lmw < 0)
                        goto done;
@@ -9120,7 +9277,10 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0,
        }
 
 done:
        }
 
 done:
+#ifndef NO_APPLE_EXTENSIONS
+       *m0 = pd.mp;
        PF_APPLE_UPDATE_PDESC_IPv6();
        PF_APPLE_UPDATE_PDESC_IPv6();
+#endif
 
        if (n != m) {
                m_freem(n);
 
        if (n != m) {
                m_freem(n);
@@ -9246,8 +9406,15 @@ done:
                pf_route6(m0, r, dir, kif->pfik_ifp, s, &pd);
 #else
 #ifndef NO_APPLE_EXTENSIONS
                pf_route6(m0, r, dir, kif->pfik_ifp, s, &pd);
 #else
 #ifndef NO_APPLE_EXTENSIONS
+       VERIFY(m == NULL || pd.mp == NULL || pd.mp == m);
+
        if (*m0) {
                if (pd.lmw < 0) {
        if (*m0) {
                if (pd.lmw < 0) {
+                       REASON_SET(&reason, PFRES_MEMORY);
+                       action = PF_DROP;
+               }
+
+               if (action == PF_DROP) {
                        m_freem(*m0);
                        *m0 = NULL;
                        return (PF_DROP);
                        m_freem(*m0);
                        *m0 = NULL;
                        return (PF_DROP);
@@ -9411,6 +9578,15 @@ pf_time_second(void)
 {
        struct timeval t;
 
 {
        struct timeval t;
 
+       microuptime(&t);
+       return (t.tv_sec);
+}
+
+uint64_t
+pf_calendar_time_second(void)
+{
+       struct timeval t;
+
        microtime(&t);
        return (t.tv_sec);
 }
        microtime(&t);
        return (t.tv_sec);
 }
index 8145fed94130e505e5ea0da31077824c506e1005..5b8461e375d0dd38cb5e601f79cecdf658ecea86 100644 (file)
@@ -1329,7 +1329,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
                        error = ENOMEM;
                } else {
                        pf_status.running = 1;
                        error = ENOMEM;
                } else {
                        pf_status.running = 1;
-                       pf_status.since = pf_time_second();
+                       pf_status.since = pf_calendar_time_second();
                        if (pf_status.stateid == 0) {
                                pf_status.stateid = pf_time_second();
                                pf_status.stateid = pf_status.stateid << 32;
                        if (pf_status.stateid == 0) {
                                pf_status.stateid = pf_time_second();
                                pf_status.stateid = pf_status.stateid << 32;
@@ -1348,7 +1348,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
                        mbuf_growth_normal();
                        pf_detach_hooks();
                        pf_status.running = 0;
                        mbuf_growth_normal();
                        pf_detach_hooks();
                        pf_status.running = 0;
-                       pf_status.since = pf_time_second();
+                       pf_status.since = pf_calendar_time_second();
                        wakeup(pf_purge_thread_fn);
                        DPFPRINTF(PF_DEBUG_MISC, ("pf: stopped\n"));
                }
                        wakeup(pf_purge_thread_fn);
                        DPFPRINTF(PF_DEBUG_MISC, ("pf: stopped\n"));
                }
@@ -1922,6 +1922,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
                        break;
                }
                pf_default_rule.states++;
                        break;
                }
                pf_default_rule.states++;
+               VERIFY(pf_default_rule.states != 0);
                break;
        }
 
                break;
        }
 
@@ -2007,7 +2008,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
                bzero(pf_status.counters, sizeof (pf_status.counters));
                bzero(pf_status.fcounters, sizeof (pf_status.fcounters));
                bzero(pf_status.scounters, sizeof (pf_status.scounters));
                bzero(pf_status.counters, sizeof (pf_status.counters));
                bzero(pf_status.fcounters, sizeof (pf_status.fcounters));
                bzero(pf_status.scounters, sizeof (pf_status.scounters));
-               pf_status.since = pf_time_second();
+               pf_status.since = pf_calendar_time_second();
                if (*pf_status.ifname)
                        pfi_update_status(pf_status.ifname, NULL);
                break;
                if (*pf_status.ifname)
                        pfi_update_status(pf_status.ifname, NULL);
                break;
index 60deece57d49b2a4aa5d7fd31268c9a896239639..b8bdb0034359296c60c644c524c019eaa52a38f4 100644 (file)
@@ -118,6 +118,7 @@ __private_extern__ void pool_sethardlimit(struct pool *, int,
 __private_extern__ void *pool_get(struct pool *, int);
 __private_extern__ void pool_put(struct pool *, void *);
 __private_extern__ u_int64_t pf_time_second(void);
 __private_extern__ void *pool_get(struct pool *, int);
 __private_extern__ void pool_put(struct pool *, void *);
 __private_extern__ u_int64_t pf_time_second(void);
+__private_extern__ u_int64_t pf_calendar_time_second(void);
 #endif /* KERNEL */
 
 union sockaddr_union {
 #endif /* KERNEL */
 
 union sockaddr_union {
index 937341b00a139fbc0d60355a646248c78b3196df..df3b53ba3008191928749e41b87bc339716ce902 100644 (file)
@@ -1208,6 +1208,8 @@ rtioctl(unsigned long req, caddr_t data, struct proc *p)
 #if INET && MROUTING
        return mrt_ioctl(req, data);
 #else
 #if INET && MROUTING
        return mrt_ioctl(req, data);
 #else
+#pragma unused(req)
+#pragma unused(data)
        return ENXIO;
 #endif
 }
        return ENXIO;
 #endif
 }
index 6897e77ac87492b9b26765821083930e7db20a02..c553930f1b13fde46a8c263eac47dd2b8837f6f4 100644 (file)
@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (c) 2004-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2004-2009 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -74,6 +74,7 @@
 #include <net/if_arp.h>
 #include <net/if_dl.h>
 #include <net/dlil.h>
 #include <net/if_arp.h>
 #include <net/if_dl.h>
 #include <net/dlil.h>
+#include <net/if_types.h>
 #include <net/route.h>
 #include <netinet/if_ether.h>
 #include <netinet/in_var.h>
 #include <net/route.h>
 #include <netinet/if_ether.h>
 #include <netinet/in_var.h>
@@ -937,34 +938,64 @@ arp_ip_handle_input(
        struct llinfo_arp *llinfo;
        errno_t error;
        int created_announcement = 0;
        struct llinfo_arp *llinfo;
        errno_t error;
        int created_announcement = 0;
-
+       int bridged = 0, is_bridge = 0;
+       
        /* Do not respond to requests for 0.0.0.0 */
        if (target_ip->sin_addr.s_addr == 0 && arpop == ARPOP_REQUEST)
                goto done;
        /* Do not respond to requests for 0.0.0.0 */
        if (target_ip->sin_addr.s_addr == 0 && arpop == ARPOP_REQUEST)
                goto done;
+       
+       if (ifp->if_bridge)
+               bridged = 1;
+       if (ifp->if_type == IFT_BRIDGE)
+               is_bridge = 1;
 
        /*
         * Determine if this ARP is for us
 
        /*
         * Determine if this ARP is for us
+        * For a bridge, we want to check the address irrespective 
+        * of the receive interface.
         */
        lck_rw_lock_shared(in_ifaddr_rwlock);
        TAILQ_FOREACH(ia, INADDR_HASH(target_ip->sin_addr.s_addr), ia_hash) {
         */
        lck_rw_lock_shared(in_ifaddr_rwlock);
        TAILQ_FOREACH(ia, INADDR_HASH(target_ip->sin_addr.s_addr), ia_hash) {
-               /* do_bridge should be tested here for bridging */
-               if (ia->ia_ifp == ifp &&
+               if (((bridged && ia->ia_ifp->if_bridge != NULL) ||
+                       (ia->ia_ifp == ifp)) &&
                    ia->ia_addr.sin_addr.s_addr == target_ip->sin_addr.s_addr) {
                    ia->ia_addr.sin_addr.s_addr == target_ip->sin_addr.s_addr) {
-                       best_ia = ia;
-                       ifaref(&best_ia->ia_ifa);
-                       lck_rw_done(in_ifaddr_rwlock);
-                       goto match;
+                               best_ia = ia;
+                               ifaref(&best_ia->ia_ifa);
+                               lck_rw_done(in_ifaddr_rwlock);
+                               goto match;
                }
        }
 
        TAILQ_FOREACH(ia, INADDR_HASH(sender_ip->sin_addr.s_addr), ia_hash) {
                }
        }
 
        TAILQ_FOREACH(ia, INADDR_HASH(sender_ip->sin_addr.s_addr), ia_hash) {
-               /* do_bridge should be tested here for bridging */
-               if (ia->ia_ifp == ifp &&
+               if (((bridged && ia->ia_ifp->if_bridge != NULL) ||
+                       (ia->ia_ifp == ifp)) &&
                    ia->ia_addr.sin_addr.s_addr == sender_ip->sin_addr.s_addr) {
                    ia->ia_addr.sin_addr.s_addr == sender_ip->sin_addr.s_addr) {
-                       best_ia = ia;
-                       ifaref(&best_ia->ia_ifa);
-                       lck_rw_done(in_ifaddr_rwlock);
-                       goto match;
+                               best_ia = ia;
+                               ifaref(&best_ia->ia_ifa);
+                               lck_rw_done(in_ifaddr_rwlock);
+                               goto match;
+               }
+       }
+
+#define BDG_MEMBER_MATCHES_ARP(addr, ifp, ia)                                                          \
+       (ia->ia_ifp->if_bridge == ifp->if_softc &&                                                              \
+       !bcmp(ifnet_lladdr(ia->ia_ifp), ifnet_lladdr(ifp), ifp->if_addrlen) &&  \
+       addr == ia->ia_addr.sin_addr.s_addr)
+       /*
+        * Check the case when bridge shares its MAC address with
+        * some of its children, so packets are claimed by bridge
+        * itself (bridge_input() does it first), but they are really
+        * meant to be destined to the bridge member.
+        */
+       if (is_bridge) {
+               TAILQ_FOREACH(ia, INADDR_HASH(target_ip->sin_addr.s_addr), ia_hash) {
+                       if (BDG_MEMBER_MATCHES_ARP(target_ip->sin_addr.s_addr, ifp, ia)) {
+                               ifp = ia->ia_ifp;
+                               best_ia = ia;
+                               ifaref(&best_ia->ia_ifa);
+                               lck_rw_done(in_ifaddr_rwlock);
+                               goto match;
+                       }
                }
        }
        lck_rw_done(in_ifaddr_rwlock);
                }
        }
        lck_rw_done(in_ifaddr_rwlock);
@@ -980,12 +1011,16 @@ arp_ip_handle_input(
                        continue;
                best_ia = (struct in_ifaddr *)ifa;
                ifaref(&best_ia->ia_ifa);
                        continue;
                best_ia = (struct in_ifaddr *)ifa;
                ifaref(&best_ia->ia_ifa);
-               break;
+               ifnet_lock_done(ifp);
+               goto match;
        }
        ifnet_lock_done(ifp);
 
        }
        ifnet_lock_done(ifp);
 
-       /* If we don't have an IP address on this interface, ignore the packet */
-       if (best_ia == NULL)
+       /*
+        * If we're not a bridge member, or if we are but there's no
+        * IPv4 address to use for the interface, drop the packet.
+        */
+       if (!bridged || best_ia == NULL)
                goto done;
 
 match:
                goto done;
 
 match:
@@ -995,7 +1030,7 @@ match:
        }
 
        /* Check for a conflict */
        }
 
        /* Check for a conflict */
-       if (sender_ip->sin_addr.s_addr == best_ia->ia_addr.sin_addr.s_addr) {
+       if (!bridged && sender_ip->sin_addr.s_addr == best_ia->ia_addr.sin_addr.s_addr) {
                struct kev_msg        ev_msg;
                struct kev_in_collision *in_collision;
                u_char  storage[sizeof(struct kev_in_collision) + MAX_HW_LEN];
                struct kev_msg        ev_msg;
                struct kev_in_collision *in_collision;
                u_char  storage[sizeof(struct kev_in_collision) + MAX_HW_LEN];
@@ -1152,7 +1187,7 @@ match:
 
        RT_LOCK_ASSERT_HELD(route);
        gateway = SDL(route->rt_gateway);
 
        RT_LOCK_ASSERT_HELD(route);
        gateway = SDL(route->rt_gateway);
-       if (route->rt_ifp != ifp) {
+       if (!bridged && route->rt_ifp != ifp) {
                if (!IN_LINKLOCAL(ntohl(sender_ip->sin_addr.s_addr)) || (ifp->if_eflags & IFEF_ARPLL) == 0) {
                        if (log_arp_warnings)
                                log(LOG_ERR, "arp: %s is on %s%d but got reply from %s on %s%d\n",
                if (!IN_LINKLOCAL(ntohl(sender_ip->sin_addr.s_addr)) || (ifp->if_eflags & IFEF_ARPLL) == 0) {
                        if (log_arp_warnings)
                                log(LOG_ERR, "arp: %s is on %s%d but got reply from %s on %s%d\n",
@@ -1286,6 +1321,19 @@ respond:
 
                if (error == 0) {
                        RT_LOCK_ASSERT_HELD(route);
 
                if (error == 0) {
                        RT_LOCK_ASSERT_HELD(route);
+                       /*
+                        * Return proxied ARP replies only on the interface
+                        * or bridge cluster where this network resides.
+                        * Otherwise we may conflict with the host we are
+                        * proxying for.
+                        */
+                       if (route->rt_ifp != ifp &&
+                               (route->rt_ifp->if_bridge != ifp->if_bridge ||
+                                ifp->if_bridge == NULL)) {
+                                       RT_REMREF_LOCKED(route);
+                                       RT_UNLOCK(route);
+                                       goto done;
+                               }
                        proxied = *SDL(route->rt_gateway);
                        target_hw = &proxied;
                } else {
                        proxied = *SDL(route->rt_gateway);
                        target_hw = &proxied;
                } else {
index 090c692bc6830f886c6898fed644d1a12674480c..54fceaef493e98cac7655b5197047abd5cd225b3 100644 (file)
@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
 #include <netinet/ip_dummynet.h>
 #include <netinet/ip_var.h>
 
 #include <netinet/ip_dummynet.h>
 #include <netinet/ip_var.h>
 
-#if BRIDGE
-#include <netinet/if_ether.h> /* for struct arpcom */
-#include <net/bridge.h>
-#endif
-
 /*
  * We keep a private variable for the simulation time, but we could
  * probably use an existing one ("softticks" in sys/kern/kern_timer.c)
 /*
  * We keep a private variable for the simulation time, but we could
  * probably use an existing one ("softticks" in sys/kern/kern_timer.c)
@@ -1155,28 +1150,6 @@ dummynet_send(struct mbuf *m)
                        proto_inject(PF_INET, m);
                        break ;
        
                        proto_inject(PF_INET, m);
                        break ;
        
-#if BRIDGE
-               case DN_TO_BDG_FWD :
-                       /*
-                        * The bridge requires/assumes the Ethernet header is
-                        * contiguous in the first mbuf header.  Insure this is true.
-                        */
-                       if (BDG_LOADED) {
-                       if (m->m_len < ETHER_HDR_LEN &&
-                               (m = m_pullup(m, ETHER_HDR_LEN)) == NULL) {
-                               printf("dummynet/bridge: pullup fail, dropping pkt\n");
-                               break;
-                       }
-                       m = bdg_forward_ptr(m, pkt->ifp);
-                       } else {
-                       /* somebody unloaded the bridge module. Drop pkt */
-                       /* XXX rate limit */
-                       printf("dummynet: dropping bridged packet trapped in pipe\n");
-                       }
-                       if (m)
-                       m_freem(m);
-                       break;
-#endif         
                default:
                        printf("dummynet: bad switch %d!\n", pkt->dn_dir);
                        m_freem(m);
                default:
                        printf("dummynet: bad switch %d!\n", pkt->dn_dir);
                        m_freem(m);
index eaf005f60cdd87b7532d5731f648a7920e72babd..a989e64e33da6224275720ec668870ec9deeac80 100644 (file)
@@ -2633,7 +2633,6 @@ ip_setmoptions(sopt, imop)
        struct ip_moptions **imop;
 {
        int error = 0;
        struct ip_moptions **imop;
 {
        int error = 0;
-       int i;
        struct in_addr addr;
        struct ip_mreq mreq;
        struct ifnet *ifp = NULL;
        struct in_addr addr;
        struct ip_mreq mreq;
        struct ifnet *ifp = NULL;
@@ -2654,20 +2653,23 @@ ip_setmoptions(sopt, imop)
        switch (sopt->sopt_name) {
        /* store an index number for the vif you wanna use in the send */
 #if MROUTING
        switch (sopt->sopt_name) {
        /* store an index number for the vif you wanna use in the send */
 #if MROUTING
-       case IP_MULTICAST_VIF:
-               if (legal_vif_num == 0) {
-                       error = EOPNOTSUPP;
-                       break;
-               }
-               error = sooptcopyin(sopt, &i, sizeof i, sizeof i);
-               if (error)
-                       break;
-               if (!legal_vif_num(i) && (i != -1)) {
-                       error = EINVAL;
+       case IP_MULTICAST_VIF: 
+               {
+                       int i;
+                       if (legal_vif_num == 0) {
+                               error = EOPNOTSUPP;
+                               break;
+                       }
+                       error = sooptcopyin(sopt, &i, sizeof i, sizeof i);
+                       if (error)
+                               break;
+                       if (!legal_vif_num(i) && (i != -1)) {
+                               error = EINVAL;
+                               break;
+                       }
+                       imo->imo_multicast_vif = i;
                        break;
                }
                        break;
                }
-               imo->imo_multicast_vif = i;
-               break;
 #endif /* MROUTING */
 
        case IP_MULTICAST_IF:
 #endif /* MROUTING */
 
        case IP_MULTICAST_IF:
index e7fda107f4a0ff73bc50891ea0ae101146184f46..a293cc24a90151bc4deba2398aa00e789bd1f5a7 100644 (file)
@@ -1007,6 +1007,10 @@ findpcb:
                goto drop;
 #endif
 
                goto drop;
 #endif
 
+       /* Radar 7377561: Avoid processing packets while closing a listen socket */
+       if (tp->t_state == TCPS_LISTEN && (so->so_options & SO_ACCEPTCONN) == 0) 
+               goto drop;
+
        if (so->so_options & (SO_DEBUG|SO_ACCEPTCONN)) {
 #if TCPDEBUG
                if (so->so_options & SO_DEBUG) {
        if (so->so_options & (SO_DEBUG|SO_ACCEPTCONN)) {
 #if TCPDEBUG
                if (so->so_options & SO_DEBUG) {
@@ -1296,7 +1300,6 @@ findpcb:
                        KERNEL_DEBUG(DBG_FNC_TCP_NEWCONN | DBG_FUNC_END,0,0,0,0,0);
                }
        }
                        KERNEL_DEBUG(DBG_FNC_TCP_NEWCONN | DBG_FUNC_END,0,0,0,0,0);
                }
        }
-
 #if 1
        lck_mtx_assert(((struct inpcb *)so->so_pcb)->inpcb_mtx, LCK_MTX_ASSERT_OWNED);
 #endif
 #if 1
        lck_mtx_assert(((struct inpcb *)so->so_pcb)->inpcb_mtx, LCK_MTX_ASSERT_OWNED);
 #endif
index 5fc80e33054070ba0695b7e939c8899197b7551e..a9fd82b9852afff77d2bbe4c9676889e82d186d3 100644 (file)
@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (c) 2003-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2003-2009 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
@@ -528,14 +528,14 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
        getmicrotime(&timenow);
 
        privileged = (proc_suser(p) == 0);
        getmicrotime(&timenow);
 
        privileged = (proc_suser(p) == 0);
-
+#if MROUTING
        switch (cmd) {
        case SIOCGETSGCNT_IN6:
        case SIOCGETMIFCNT_IN6_32:
        case SIOCGETMIFCNT_IN6_64:
                return (mrt6_ioctl(cmd, data));
        }
        switch (cmd) {
        case SIOCGETSGCNT_IN6:
        case SIOCGETMIFCNT_IN6_32:
        case SIOCGETMIFCNT_IN6_64:
                return (mrt6_ioctl(cmd, data));
        }
-
+#endif
        if (ifp == NULL)
                return (EOPNOTSUPP);
 
        if (ifp == NULL)
                return (EOPNOTSUPP);
 
@@ -724,20 +724,9 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
 
        case SIOCPROTOATTACH_IN6_32:
        case SIOCPROTOATTACH_IN6_64:
 
        case SIOCPROTOATTACH_IN6_32:
        case SIOCPROTOATTACH_IN6_64:
-               switch (ifp->if_type) {
-#if IFT_BRIDGE /*OpenBSD 2.8*/
-       /* some of the interfaces are inherently not IPv6 capable */
-                       case IFT_BRIDGE:
-                               return;
-                               /* NOTREACHED */
-#endif
-                       default:
-                               if ((error = proto_plumb(PF_INET6, ifp)))
-                                       printf("SIOCPROTOATTACH_IN6: %s "
-                                           "error=%d\n", if_name(ifp), error);
-                               break;
-
-               }
+               if ((error = proto_plumb(PF_INET6, ifp)))
+                       printf("SIOCPROTOATTACH_IN6: %s "
+                                  "error=%d\n", if_name(ifp), error);
                return (error);
                /* NOTREACHED */
 
                return (error);
                /* NOTREACHED */
 
index dff06569fe54549ac1cc0c036a212fda850d345f..5995b212d0952bab0b0f5b32c956b6299bb77e17 100644 (file)
@@ -307,6 +307,7 @@ found:
 #if IFT_IEEE80211
        case IFT_IEEE80211:
 #endif
 #if IFT_IEEE80211
        case IFT_IEEE80211:
 #endif
+       case IFT_BRIDGE:
                /* IEEE802/EUI64 cases - what others? */
                /* IEEE1394 uses 16byte length address starting with EUI64 */
                if (addrlen > 8)
                /* IEEE802/EUI64 cases - what others? */
                /* IEEE1394 uses 16byte length address starting with EUI64 */
                if (addrlen > 8)
index a197c6c6b6fc7be8789c31215a34a26f81c73f0f..d2621dd30700f2afc602d0fbd95174b460d8a78e 100644 (file)
@@ -312,6 +312,7 @@ struct ip6protosw inet6sw[] = {
   0,           rip_unlock,     0,
   { 0, 0 }, NULL, { 0 }
 },
   0,           rip_unlock,     0,
   { 0, 0 }, NULL, { 0 }
 },
+#if MROUTING
 { SOCK_RAW,     &inet6domain,  IPPROTO_PIM,    PR_ATOMIC|PR_ADDR|PR_LASTHDR,
   pim6_input,  rip6_pr_output, 0,              rip6_ctloutput,
   0,
 { SOCK_RAW,     &inet6domain,  IPPROTO_PIM,    PR_ATOMIC|PR_ADDR|PR_LASTHDR,
   pim6_input,  rip6_pr_output, 0,              rip6_ctloutput,
   0,
@@ -321,6 +322,17 @@ struct ip6protosw inet6sw[] = {
   0,           rip_unlock,     0,
   { 0, 0 }, NULL, { 0 }
 },
   0,           rip_unlock,     0,
   { 0, 0 }, NULL, { 0 }
 },
+#else
+{ SOCK_RAW,     &inet6domain,  IPPROTO_PIM,    PR_ATOMIC|PR_ADDR|PR_LASTHDR,
+  0,           0,              0,              rip6_ctloutput,
+  0,
+  0,           0,              0,              0,
+  0,   
+  &rip6_usrreqs,
+  0,           rip_unlock,     0,
+  { 0, 0 }, NULL, { 0 }
+},
+#endif
 /* raw wildcard */
 { SOCK_RAW,    &inet6domain,   0,              PR_ATOMIC|PR_ADDR|PR_LASTHDR,
   rip6_input,  rip6_pr_output, 0,              rip6_ctloutput,
 /* raw wildcard */
 { SOCK_RAW,    &inet6domain,   0,              PR_ATOMIC|PR_ADDR|PR_LASTHDR,
   rip6_input,  rip6_pr_output, 0,              rip6_ctloutput,
@@ -548,8 +560,10 @@ SYSCTL_INT(_net_inet6_ip6, IPV6CTL_AUTO_LINKLOCAL,
        auto_linklocal, CTLFLAG_RW, &ip6_auto_linklocal,        0, "");
 SYSCTL_STRUCT(_net_inet6_ip6, IPV6CTL_RIP6STATS, rip6stats, CTLFLAG_RD,
        &rip6stat, rip6stat, "");
        auto_linklocal, CTLFLAG_RW, &ip6_auto_linklocal,        0, "");
 SYSCTL_STRUCT(_net_inet6_ip6, IPV6CTL_RIP6STATS, rip6stats, CTLFLAG_RD,
        &rip6stat, rip6stat, "");
+#if MROUTING
 SYSCTL_STRUCT(_net_inet6_ip6, OID_AUTO, mrt6stat, CTLFLAG_RD,
         &mrt6stat, mrt6stat, "");
 SYSCTL_STRUCT(_net_inet6_ip6, OID_AUTO, mrt6stat, CTLFLAG_RD,
         &mrt6stat, mrt6stat, "");
+#endif
 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_NEIGHBORGCTHRESH,
        neighborgcthresh, CTLFLAG_RW,   &ip6_neighborgcthresh,  0, "");
 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MAXIFPREFIXES,
 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_NEIGHBORGCTHRESH,
        neighborgcthresh, CTLFLAG_RW,   &ip6_neighborgcthresh,  0, "");
 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MAXIFPREFIXES,
index 1c19434ab1fa88cc9a65e92e039a75ca37c87550..cdf3776b2b18ceca165d4f4950af1b774c2a1501 100644 (file)
@@ -637,7 +637,11 @@ ip6_input(m)
                ifnet_lock_done(ifp);
                if (in6m)
                        ours = 1;
                ifnet_lock_done(ifp);
                if (in6m)
                        ours = 1;
+#if MROUTING
                else if (!ip6_mrouter) {
                else if (!ip6_mrouter) {
+#else
+               else {
+#endif
                        ip6stat.ip6s_notmember++;
                        ip6stat.ip6s_cantforward++;
                        in6_ifstat_inc(ifp, ifs6_in_discard);
                        ip6stat.ip6s_notmember++;
                        ip6stat.ip6s_cantforward++;
                        in6_ifstat_inc(ifp, ifs6_in_discard);
@@ -902,12 +906,14 @@ ip6_input(m)
                 * ip6_mforward() returns a non-zero value, the packet
                 * must be discarded, else it may be accepted below.
                 */
                 * ip6_mforward() returns a non-zero value, the packet
                 * must be discarded, else it may be accepted below.
                 */
+#if MROUTING
                if (ip6_mrouter && ip6_mforward(ip6, m->m_pkthdr.rcvif, m)) {
                        ip6stat.ip6s_cantforward++;
                        m_freem(m);
                        lck_mtx_unlock(ip6_mutex);
                        return;
                }
                if (ip6_mrouter && ip6_mforward(ip6, m->m_pkthdr.rcvif, m)) {
                        ip6stat.ip6s_cantforward++;
                        m_freem(m);
                        lck_mtx_unlock(ip6_mutex);
                        return;
                }
+#endif
                if (!ours) {
                        m_freem(m);
                        lck_mtx_unlock(ip6_mutex);
                if (!ours) {
                        m_freem(m);
                        lck_mtx_unlock(ip6_mutex);
index 3f0735c6337aa1f77094c06f946b37a7ac1fa768..da8c4fc96caa50bf7611c8ab27fd1d4e7d47ab0c 100644 (file)
@@ -135,6 +135,9 @@ extern lck_mtx_t *ip6_mutex;
 struct socket  *ip6_mrouter  = NULL;
 int            ip6_mrouter_ver = 0;
 int            ip6_mrtproto = IPPROTO_PIM;    /* for netstat only */
 struct socket  *ip6_mrouter  = NULL;
 int            ip6_mrouter_ver = 0;
 int            ip6_mrtproto = IPPROTO_PIM;    /* for netstat only */
+
+#if MROUTING
+
 struct mrt6stat        mrt6stat;
 
 #define NO_RTE_FOUND   0x1
 struct mrt6stat        mrt6stat;
 
 #define NO_RTE_FOUND   0x1
@@ -1905,3 +1908,4 @@ pim6_input(mp, offp)
        rip6_input(&m, offp);
        return(IPPROTO_DONE);
 }
        rip6_input(&m, offp);
        return(IPPROTO_DONE);
 }
+#endif
index dd50d46bda9870d91067436342472802d68b5b8b..5eef448db9f36b3b93407709bfbb98c952fdea45 100644 (file)
@@ -313,6 +313,7 @@ struct rtdetq {             /* XXX: rtdetq is also defined in ip_mroute.h */
 };
 #endif /* _NETINET_IP_MROUTE_H_ */
 
 };
 #endif /* _NETINET_IP_MROUTE_H_ */
 
+#if MROUTING
 #ifdef KERNEL_PRIVATE
 extern struct mrt6stat mrt6stat;
 
 #ifdef KERNEL_PRIVATE
 extern struct mrt6stat mrt6stat;
 
@@ -322,5 +323,6 @@ extern int ip6_mrouter_done(void);
 extern int mrt6_ioctl(u_long, caddr_t);
 #endif /* KERNEL_PRIVATE */
 #endif /* PRIVATE */
 extern int mrt6_ioctl(u_long, caddr_t);
 #endif /* KERNEL_PRIVATE */
 #endif /* PRIVATE */
+#endif
 
 #endif /* !_NETINET6_IP6_MROUTE_H_ */
 
 #endif /* !_NETINET6_IP6_MROUTE_H_ */
index e426933c6f8bf16f5043b531250757eef22df864..39c0d4602e8ddbf4d7c82a1c5d2a40e7024c34e4 100644 (file)
@@ -867,12 +867,14 @@ skip_ipsec2:;
                         * above, will be forwarded by the ip6_input() routine,
                         * if necessary.
                         */
                         * above, will be forwarded by the ip6_input() routine,
                         * if necessary.
                         */
+#if MROUTING
                        if (ip6_mrouter && (flags & IPV6_FORWARDING) == 0) {
                                if (ip6_mforward(ip6, ifp, m) != 0) {
                                        m_freem(m);
                                        goto done;
                                }
                        }
                        if (ip6_mrouter && (flags & IPV6_FORWARDING) == 0) {
                                if (ip6_mforward(ip6, ifp, m) != 0) {
                                        m_freem(m);
                                        goto done;
                                }
                        }
+#endif
                }
                /*
                 * Multicasts with a hoplimit of zero may be looped back,
                }
                /*
                 * Multicasts with a hoplimit of zero may be looped back,
index a895cad31cb19c54e9774c19d64d2a304e798342..c5b5bca1753daac74e4a9af33efbde6ea182e173 100644 (file)
@@ -300,8 +300,9 @@ extern int  ip6_neighborgcthresh;   /* Threshold # of NDP entries for GC */
 extern int     ip6_maxifprefixes;      /* Max acceptable prefixes via RA per IF */
 extern int     ip6_maxifdefrouters;    /* Max acceptable def routers via RA */
 extern int     ip6_maxdynroutes;       /* Max # of routes created via redirect */
 extern int     ip6_maxifprefixes;      /* Max acceptable prefixes via RA per IF */
 extern int     ip6_maxifdefrouters;    /* Max acceptable def routers via RA */
 extern int     ip6_maxdynroutes;       /* Max # of routes created via redirect */
-
+#ifdef MROUTING
 extern struct socket *ip6_mrouter;     /* multicast routing daemon */
 extern struct socket *ip6_mrouter;     /* multicast routing daemon */
+#endif
 extern int     ip6_sendredirects;      /* send IP redirects when forwarding? */
 extern int     ip6_maxfragpackets;     /* Maximum packets in reassembly queue */
 extern int      ip6_maxfrags;          /* Maximum fragments in reassembly queue */
 extern int     ip6_sendredirects;      /* send IP redirects when forwarding? */
 extern int     ip6_maxfragpackets;     /* Maximum packets in reassembly queue */
 extern int      ip6_maxfrags;          /* Maximum fragments in reassembly queue */
index b65d9a5eff13e4d2e90b5839e71edb6917b3605d..6a7da3d2bb6a64999a6b5f7308d0cb7e32848996 100644 (file)
@@ -3384,6 +3384,7 @@ ipsec6_output_tunnel(
                                struct ip *ip;
                                struct sockaddr_in* dst4;
                                struct route *ro4 = NULL;
                                struct ip *ip;
                                struct sockaddr_in* dst4;
                                struct route *ro4 = NULL;
+                               struct ip_out_args ipoa = { IFSCOPE_NONE };
 
                                /*
                                 * must be last isr because encapsulated IPv6 packet
 
                                /*
                                 * must be last isr because encapsulated IPv6 packet
@@ -3418,14 +3419,7 @@ ipsec6_output_tunnel(
                                        dst4->sin_family = AF_INET;
                                        dst4->sin_len = sizeof(*dst4);
                                        dst4->sin_addr = ip->ip_dst;
                                        dst4->sin_family = AF_INET;
                                        dst4->sin_len = sizeof(*dst4);
                                        dst4->sin_addr = ip->ip_dst;
-                                       rtalloc(ro4);
                                }
                                }
-                               if (ro4->ro_rt == NULL) {
-                                       OSAddAtomic(1, &ipstat.ips_noroute);
-                                       error = EHOSTUNREACH;
-                                       goto bad;
-                               }
-       
                                state->m = ipsec4_splithdr(state->m);
                                if (!state->m) {
                                        error = ENOMEM;
                                state->m = ipsec4_splithdr(state->m);
                                if (!state->m) {
                                        error = ENOMEM;
@@ -3474,8 +3468,10 @@ ipsec6_output_tunnel(
                                }
                                ip = mtod(state->m, struct ip *);
                                ip->ip_len = ntohs(ip->ip_len);  /* flip len field before calling ip_output */
                                }
                                ip = mtod(state->m, struct ip *);
                                ip->ip_len = ntohs(ip->ip_len);  /* flip len field before calling ip_output */
-                               ip_output(state->m, NULL, ro4, 0, NULL, NULL);
+                               error = ip_output(state->m, NULL, ro4, IP_OUTARGS, NULL, &ipoa);
                                state->m = NULL;
                                state->m = NULL;
+                               if (error != 0)
+                                       goto bad;
                                goto done;
                        } else {
                                ipseclog((LOG_ERR, "ipsec6_output_tunnel: "
                                goto done;
                        } else {
                                ipseclog((LOG_ERR, "ipsec6_output_tunnel: "
@@ -4132,6 +4128,7 @@ ipsec_send_natt_keepalive(
        struct udphdr *uh;
        struct ip *ip;
        int error;
        struct udphdr *uh;
        struct ip *ip;
        int error;
+       struct ip_out_args ipoa = { IFSCOPE_NONE };
 
        lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_NOTOWNED);
        
 
        lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_NOTOWNED);
        
@@ -4172,7 +4169,7 @@ ipsec_send_natt_keepalive(
        uh->uh_sum = 0;
        *(u_int8_t*)((char*)m_mtod(m) + sizeof(struct ip) + sizeof(struct udphdr)) = 0xFF;
        
        uh->uh_sum = 0;
        *(u_int8_t*)((char*)m_mtod(m) + sizeof(struct ip) + sizeof(struct udphdr)) = 0xFF;
        
-       error = ip_output(m, NULL, &sav->sah->sa_route, IP_NOIPSEC, NULL, NULL);
+       error = ip_output(m, NULL, &sav->sah->sa_route, IP_OUTARGS | IP_NOIPSEC, NULL, &ipoa);
        if (error == 0) {
                sav->natt_last_activity = natt_now;
                return TRUE;
        if (error == 0) {
                sav->natt_last_activity = natt_now;
                return TRUE;
index 36e09c8b8264e3b7e6d5e2bbebd9a22e1dc4e0f8..7e9a882e87e1f7341126fdb480b5413b9340e706 100644 (file)
@@ -499,7 +499,11 @@ mld6_sendpkt(
         * Request loopback of the report if we are acting as a multicast
         * router, so that the process-level routing daemon can hear it.
         */
         * Request loopback of the report if we are acting as a multicast
         * router, so that the process-level routing daemon can hear it.
         */
+#if MROUTING
        im6o.im6o_multicast_loop = (ip6_mrouter != NULL);
        im6o.im6o_multicast_loop = (ip6_mrouter != NULL);
+#else
+       im6o.im6o_multicast_loop = 0;
+#endif
 
        /* increment output statictics */
        icmp6stat.icp6s_outhist[type]++;
 
        /* increment output statictics */
        icmp6stat.icp6s_outhist[type]++;
index 5f2c2abcde8df5ea07e0e79195650df8e742c671..a7b5cb3a3dc5284b69f22c87aa69449e08ec9e2f 100644 (file)
@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (c) 2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2008-2009 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -1309,7 +1309,7 @@ nd6_free(
                dr = defrouter_lookup(&((struct sockaddr_in6 *)rt_key(rt))->
                    sin6_addr, rt->rt_ifp);
 
                dr = defrouter_lookup(&((struct sockaddr_in6 *)rt_key(rt))->
                    sin6_addr, rt->rt_ifp);
 
-               if (ln->ln_router || dr) {
+               if ((ln && ln->ln_router) || dr) {
                        /*
                         * rt6_flush must be called whether or not the neighbor
                         * is in the Default Router List.
                        /*
                         * rt6_flush must be called whether or not the neighbor
                         * is in the Default Router List.
@@ -2906,6 +2906,7 @@ nd6_need_cache(
 #if IFT_IEEE80211
        case IFT_IEEE80211:
 #endif
 #if IFT_IEEE80211
        case IFT_IEEE80211:
 #endif
+       case IFT_BRIDGE:
        case IFT_GIF:           /* XXX need more cases? */
                return(1);
        default:
        case IFT_GIF:           /* XXX need more cases? */
                return(1);
        default:
@@ -2933,6 +2934,7 @@ nd6_storelladdr(
 #if IFT_IEEE80211
                case IFT_IEEE80211:
 #endif
 #if IFT_IEEE80211
                case IFT_IEEE80211:
 #endif
+               case IFT_BRIDGE:
                        ETHER_MAP_IPV6_MULTICAST(&SIN6(dst)->sin6_addr,
                                                 desten);
                        return(1);
                        ETHER_MAP_IPV6_MULTICAST(&SIN6(dst)->sin6_addr,
                                                 desten);
                        return(1);
index 762258e4554d67e4d1ec39f5a64660f39820b12a..d4a32b9274a8d05acd585ec4e7556339de127741 100644 (file)
@@ -566,8 +566,10 @@ rip6_ctloutput(
                case MRT6_ADD_MFC:
                case MRT6_DEL_MFC:
                case MRT6_PIM:
                case MRT6_ADD_MFC:
                case MRT6_DEL_MFC:
                case MRT6_PIM:
+#if MROUTING
                        error = ip6_mrouter_get(so, sopt);
                        break;
                        error = ip6_mrouter_get(so, sopt);
                        break;
+#endif
                default:
                        error = ip6_ctloutput(so, sopt);
                        break;
                default:
                        error = ip6_ctloutput(so, sopt);
                        break;
@@ -597,8 +599,10 @@ rip6_ctloutput(
                case MRT6_ADD_MFC:
                case MRT6_DEL_MFC:
                case MRT6_PIM:
                case MRT6_ADD_MFC:
                case MRT6_DEL_MFC:
                case MRT6_PIM:
+#if MROUTING
                        error = ip6_mrouter_set(so, sopt);
                        break;
                        error = ip6_mrouter_set(so, sopt);
                        break;
+#endif
                default:
                        error = ip6_ctloutput(so, sopt);
                        break;
                default:
                        error = ip6_ctloutput(so, sopt);
                        break;
@@ -649,8 +653,10 @@ rip6_detach(struct socket *so)
        if (inp == 0)
                panic("rip6_detach");
        /* xxx: RSVP */
        if (inp == 0)
                panic("rip6_detach");
        /* xxx: RSVP */
+#if MROUTING
        if (so == ip6_mrouter)
                ip6_mrouter_done();
        if (so == ip6_mrouter)
                ip6_mrouter_done();
+#endif
        if (inp->in6p_icmp6filt) {
                FREE(inp->in6p_icmp6filt, M_PCB);
                inp->in6p_icmp6filt = NULL;
        if (inp->in6p_icmp6filt) {
                FREE(inp->in6p_icmp6filt, M_PCB);
                inp->in6p_icmp6filt = NULL;
index e630eb95c4d092b2bd2aff938341d7c8f69ba79f..ee1a6167988c030c9415dd721746cc413182e3c2 100644 (file)
@@ -3952,10 +3952,10 @@ nfsrv_rcv_locked(socket_t so, struct nfsrv_sock *slp, int waitflag)
        if (slp->ns_sotype == SOCK_STREAM) {
                /*
                 * If there are already records on the queue, defer soreceive()
        if (slp->ns_sotype == SOCK_STREAM) {
                /*
                 * If there are already records on the queue, defer soreceive()
-                * to an nfsd so that there is feedback to the TCP layer that
+                * to an(other) nfsd so that there is feedback to the TCP layer that
                 * the nfs servers are heavily loaded.
                 */
                 * the nfs servers are heavily loaded.
                 */
-               if (slp->ns_rec && waitflag == MBUF_DONTWAIT) {
+               if (slp->ns_rec) {
                        ns_flag = SLP_NEEDQ;
                        goto dorecs;
                }
                        ns_flag = SLP_NEEDQ;
                        goto dorecs;
                }
index a11222c07fcadb1c6975075c29b1e7dfeb71219e..6d3801a50f4e835dfb8a7ecb28510e1fd3114996 100644 (file)
@@ -147,6 +147,7 @@ struct buf {
  * Parameters for buffer cache garbage collection 
  */
 #define BUF_STALE_THRESHHOLD   30      /* Collect if untouched in the last 30 seconds */
  * Parameters for buffer cache garbage collection 
  */
 #define BUF_STALE_THRESHHOLD   30      /* Collect if untouched in the last 30 seconds */
+#define BUF_MAX_GC_COUNT       1000    /* Generally 6-8 MB */
 
 /*
  * mask used by buf_flags... these are the readable external flags
 
 /*
  * mask used by buf_flags... these are the readable external flags
index 3abe336ccb2afda2c0a3a471a3027fbc2e54bbef..92f687f3ee13ee10836ac55089c06aad89fb98b0 100644 (file)
@@ -66,6 +66,10 @@ enum {
 typedef struct jetsam_priority_entry {
        pid_t pid;
        uint32_t flags;
 typedef struct jetsam_priority_entry {
        pid_t pid;
        uint32_t flags;
+       int32_t hiwat_pages;
+       int32_t hiwat_reserved1;
+       int32_t hiwat_reserved2;
+       int32_t hiwat_reserved3;
 } jetsam_priority_entry_t;
 
 /*
 } jetsam_priority_entry_t;
 
 /*
index d5a3d32721056ea669a666277a203f7d12053955..57740c51fd926cd41be7f57d7f1acdf2e08e0d0f 100644 (file)
@@ -292,6 +292,9 @@ struct vfs_attr {
  * NFS export related mount flags.
  */
 #define        MNT_EXPORTED    0x00000100      /* file system is exported */
  * NFS export related mount flags.
  */
 #define        MNT_EXPORTED    0x00000100      /* file system is exported */
+#ifdef PRIVATE
+#define MNT_IMGSRC     0x00000200
+#endif /* CONFIG_IMGSRC_ACCESS */
 
 /*
  * MAC labeled / "quarantined" flag
 
 /*
  * MAC labeled / "quarantined" flag
index 6cca245d501206c7975e852fe3db167ac48fc82b..cb71406df546842f4b095aae08030dc3eb887132 100644 (file)
@@ -227,6 +227,10 @@ extern struct mount * dead_mountp;
  *             because the bits here were broken out from the high bits
  *             of the mount flags.
  */
  *             because the bits here were broken out from the high bits
  *             of the mount flags.
  */
+#ifdef CONFIG_IMGSRC_ACCESS
+#define MNTK_HAS_MOVED         0x00002000
+#define MNTK_BACKS_ROOT                0x00004000
+#endif /* CONFIG_IMGSRC_ACCESS */
 #define MNTK_AUTH_CACHE_TTL    0x00008000      /* rights cache has TTL - TTL of 0 disables cache */
 #define        MNTK_PATH_FROM_ID       0x00010000      /* mounted file system supports id-to-path lookups */
 #define        MNTK_UNMOUNT_PREFLIGHT  0x00020000      /* mounted file system wants preflight check during unmount */
 #define MNTK_AUTH_CACHE_TTL    0x00008000      /* rights cache has TTL - TTL of 0 disables cache */
 #define        MNTK_PATH_FROM_ID       0x00010000      /* mounted file system supports id-to-path lookups */
 #define        MNTK_UNMOUNT_PREFLIGHT  0x00020000      /* mounted file system wants preflight check during unmount */
index dc68c04c2cffcd22f85f271fff8ab2e1acc393e9..7d0cfae29b980b97ae3066070070eb1716d8a456 100644 (file)
@@ -42,7 +42,6 @@ struct threadlist {
        TAILQ_ENTRY(threadlist) th_entry;
        thread_t th_thread;
        int      th_flags;
        TAILQ_ENTRY(threadlist) th_entry;
        thread_t th_thread;
        int      th_flags;
-       uint32_t th_suspended;
        uint16_t th_affinity_tag;
        uint8_t  th_priority;
        uint8_t  th_policy;
        uint16_t th_affinity_tag;
        uint8_t  th_priority;
        uint8_t  th_policy;
@@ -57,6 +56,7 @@ struct threadlist {
 #define TH_LIST_BLOCKED        0x04
 #define TH_LIST_SUSPENDED      0x08
 #define TH_LIST_BUSY           0x10
 #define TH_LIST_BLOCKED        0x04
 #define TH_LIST_SUSPENDED      0x08
 #define TH_LIST_BUSY           0x10
+#define TH_LIST_NEED_WAKEUP    0x20
 
 struct workitem {
        TAILQ_ENTRY(workitem) wi_entry;
 
 struct workitem {
        TAILQ_ENTRY(workitem) wi_entry;
index 85829a914813a63af2e7aee22c826f5275da4ced..72c969c125a6bb86125ddfc739f27ba4415781a8 100644 (file)
@@ -106,6 +106,7 @@ typedef __uint64_t  rlim_t;
 
 #if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)
 #define        PRIO_DARWIN_THREAD      3               /* Second argument is always 0 (current thread) */
 
 #if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)
 #define        PRIO_DARWIN_THREAD      3               /* Second argument is always 0 (current thread) */
+#define        PRIO_DARWIN_PROCESS     4               /* Second argument is a PID */
 
 /*
  * Range limitations for the value of the third parameter to setpriority().
 
 /*
  * Range limitations for the value of the third parameter to setpriority().
@@ -113,7 +114,8 @@ typedef __uint64_t  rlim_t;
 #define        PRIO_MIN        -20
 #define        PRIO_MAX        20
 
 #define        PRIO_MIN        -20
 #define        PRIO_MAX        20
 
-/* use PRIO_DARWIN_BG to set the current thread into "background" state
+/* 
+ * use PRIO_DARWIN_BG to set the current thread into "background" state
  * which lowers CPU, disk IO, and networking priorites until thread terminates
  * or "background" state is revoked
  */
  * which lowers CPU, disk IO, and networking priorites until thread terminates
  * or "background" state is revoked
  */
index 2bd0c593efb1624cf3413f6879e631b6d75f88b1..830dc76ae7f8ac938603026df73c396da6214953 100644 (file)
@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -120,7 +120,7 @@ struct socket {
        short   so_options;             /* from socket call, see socket.h */
        short   so_linger;              /* time to linger while closing */
        short   so_state;               /* internal state flags SS_*, below */
        short   so_options;             /* from socket call, see socket.h */
        short   so_linger;              /* time to linger while closing */
        short   so_state;               /* internal state flags SS_*, below */
-       caddr_t so_pcb;                 /* protocol control block */
+       void    *so_pcb;                        /* protocol control block */
        struct  protosw *so_proto;      /* protocol handle */
        /*
         * Variables for connection queueing.
        struct  protosw *so_proto;      /* protocol handle */
        /*
         * Variables for connection queueing.
index 4a7700e0bb9ba8c798f2e9ba4591d5d2cf9c1488..ab6ca6658c9c922df6fa22e865fb9b395ecc45f3 100644 (file)
@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
 #define SIOCGIFBOND    _IOWR('i', 71, struct ifreq)    /* get bond if config */
 #define        SIOCIFCREATE    _IOWR('i', 120, struct ifreq)   /* create clone if */
 #define        SIOCIFDESTROY    _IOW('i', 121, struct ifreq)   /* destroy clone if */
 #define SIOCGIFBOND    _IOWR('i', 71, struct ifreq)    /* get bond if config */
 #define        SIOCIFCREATE    _IOWR('i', 120, struct ifreq)   /* create clone if */
 #define        SIOCIFDESTROY    _IOW('i', 121, struct ifreq)   /* destroy clone if */
+
+#define SIOCSDRVSPEC    _IOW('i', 123, struct ifdrv)    /* set driver-specific
+                                                                  parameters */
+#define SIOCGDRVSPEC    _IOWR('i', 123, struct ifdrv)   /* get driver-specific
+                                                                  parameters */
+#ifdef KERNEL_PRIVATE
+#define SIOCSDRVSPEC32    _IOW('i', 123, struct ifdrv32)    /* set driver-specific
+                                                                  parameters */
+#define SIOCGDRVSPEC32    _IOWR('i', 123, struct ifdrv32)   /* get driver-specific
+                                                                  parameters */
+#define SIOCSDRVSPEC64    _IOW('i', 123, struct ifdrv64)    /* set driver-specific
+                                                                  parameters */
+#define SIOCGDRVSPEC64    _IOWR('i', 123, struct ifdrv64)   /* get driver-specific
+                                                                  parameters */
+
+#endif /* KERNEL_PRIVATE */
 #define        SIOCSIFVLAN      _IOW('i', 126, struct ifreq)   /* set VLAN config */
 #define        SIOCGIFVLAN     _IOWR('i', 127, struct ifreq)   /* get VLAN config */
 #define        SIOCSETVLAN     SIOCSIFVLAN
 #define        SIOCSIFVLAN      _IOW('i', 126, struct ifreq)   /* set VLAN config */
 #define        SIOCGIFVLAN     _IOWR('i', 127, struct ifreq)   /* get VLAN config */
 #define        SIOCSETVLAN     SIOCSIFVLAN
index 7c73b04ee119d01035404d76137263c42fb565dd..775a8457b775c1687f451cdf7a4611ff8262881a 100644 (file)
 
 extern struct zone     *ubc_info_zone;
 
 
 extern struct zone     *ubc_info_zone;
 
+/* 
+ * Maximum number of vfs clusters per vnode
+ */
+#define MAX_CLUSTERS   CONFIG_MAX_CLUSTERS
 
 
-#define MAX_CLUSTERS 8         /* maximum number of vfs clusters per vnode */
 #define SPARSE_PUSH_LIMIT 4    /* limit on number of concurrent sparse pushes outside of the cl_lockw */
                                 /* once we reach this limit, we'll hold the lock */
 
 #define SPARSE_PUSH_LIMIT 4    /* limit on number of concurrent sparse pushes outside of the cl_lockw */
                                 /* once we reach this limit, we'll hold the lock */
 
index cf8f7b455b313cd30eb7a53e0d566e652fa42284..dbff3a50dd187407d363acdbd27617052555f9a7 100644 (file)
@@ -250,6 +250,10 @@ struct vnode {
  */
 extern struct vnode *rootvnode;        /* root (i.e. "/") vnode */
 
  */
 extern struct vnode *rootvnode;        /* root (i.e. "/") vnode */
 
+#ifdef CONFIG_IMGSRC_ACCESS
+extern struct vnode *imgsrc_rootvnode;
+#endif /* CONFIG_IMGSRC_ACCESS */
+
 
 /*
  * Mods for exensibility.
 
 /*
  * Mods for exensibility.
index 3f4c4e59372c04ba10b0ed279e438d446f2dcfd2..6d3eba5eb44a45762078f514c6ae6e7af809349a 100644 (file)
@@ -125,7 +125,7 @@ static void buf_reassign(buf_t bp, vnode_t newvp);
 static errno_t buf_acquire_locked(buf_t bp, int flags, int slpflag, int slptimeo);
 static int     buf_iterprepare(vnode_t vp, struct buflists *, int flags);
 static void    buf_itercomplete(vnode_t vp, struct buflists *, int flags);
 static errno_t buf_acquire_locked(buf_t bp, int flags, int slpflag, int slptimeo);
 static int     buf_iterprepare(vnode_t vp, struct buflists *, int flags);
 static void    buf_itercomplete(vnode_t vp, struct buflists *, int flags);
-static boolean_t buffer_cache_gc(void);
+boolean_t buffer_cache_gc(void);
 
 __private_extern__ int  bdwrite_internal(buf_t, int);
 
 
 __private_extern__ int  bdwrite_internal(buf_t, int);
 
@@ -3648,12 +3648,13 @@ dump_buffer:
        return(0);
 }
 
        return(0);
 }
 
-static boolean_t 
+boolean_t 
 buffer_cache_gc(void)
 {
        buf_t bp;
        boolean_t did_large_zfree = FALSE;
        int now = buf_timestamp();
 buffer_cache_gc(void)
 {
        buf_t bp;
        boolean_t did_large_zfree = FALSE;
        int now = buf_timestamp();
+       uint32_t count = 0;
 
        lck_mtx_lock_spin(buf_mtxp);
 
 
        lck_mtx_lock_spin(buf_mtxp);
 
@@ -3661,7 +3662,7 @@ buffer_cache_gc(void)
        bp = TAILQ_FIRST(&bufqueues[BQ_META]);
 
        /* Only collect buffers unused in the last N seconds. Note: ordered by timestamp. */
        bp = TAILQ_FIRST(&bufqueues[BQ_META]);
 
        /* Only collect buffers unused in the last N seconds. Note: ordered by timestamp. */
-       while ((bp != NULL) && ((now - bp->b_timestamp) > BUF_STALE_THRESHHOLD)) {
+       while ((bp != NULL) && ((now - bp->b_timestamp) > BUF_STALE_THRESHHOLD) && (count < BUF_MAX_GC_COUNT)) {
                int result, size;
                boolean_t is_zalloc;
 
                int result, size;
                boolean_t is_zalloc;
 
@@ -3674,6 +3675,7 @@ buffer_cache_gc(void)
                        did_large_zfree = TRUE;
                }
                bp = TAILQ_FIRST(&bufqueues[BQ_META]);
                        did_large_zfree = TRUE;
                }
                bp = TAILQ_FIRST(&bufqueues[BQ_META]);
+               count++;
        } 
 
        lck_mtx_unlock(buf_mtxp);
        } 
 
        lck_mtx_unlock(buf_mtxp);
index 5aec1498a48a26d7480d7a8ec7c7176177659475..d436d781baa3ce22544f7b859ac49bfdf5d63b46 100644 (file)
@@ -2718,7 +2718,7 @@ cluster_write_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t old
                         * because IO_HEADZEROFILL and IO_TAILZEROFILL not set
                         */
                        if ((start_offset + total_size) > max_io_size)
                         * because IO_HEADZEROFILL and IO_TAILZEROFILL not set
                         */
                        if ((start_offset + total_size) > max_io_size)
-                               total_size -= start_offset;
+                               total_size = max_io_size - start_offset;
                        xfer_resid = total_size;
 
                        retval = cluster_copy_ubc_data_internal(vp, uio, &xfer_resid, 1, 1);
                        xfer_resid = total_size;
 
                        retval = cluster_copy_ubc_data_internal(vp, uio, &xfer_resid, 1, 1);
@@ -5614,6 +5614,14 @@ is_file_clean(vnode_t vp, off_t filesize)
 #define DRT_HASH_SMALL_MODULUS 23
 #define DRT_HASH_LARGE_MODULUS 401
 
 #define DRT_HASH_SMALL_MODULUS 23
 #define DRT_HASH_LARGE_MODULUS 401
 
+/*
+ * Physical memory required before the large hash modulus is permitted.
+ *
+ * On small memory systems, the large hash modulus can lead to phsyical
+ * memory starvation, so we avoid using it there.
+ */
+#define DRT_HASH_LARGE_MEMORY_REQUIRED (1024LL * 1024LL * 1024LL)      /* 1GiB */
+
 #define DRT_SMALL_ALLOCATION   1024    /* 104 bytes spare */
 #define DRT_LARGE_ALLOCATION   16384   /* 344 bytes spare */
 
 #define DRT_SMALL_ALLOCATION   1024    /* 104 bytes spare */
 #define DRT_LARGE_ALLOCATION   16384   /* 344 bytes spare */
 
@@ -5756,8 +5764,12 @@ vfs_drt_alloc_map(struct vfs_drt_clustermap **cmapp)
                 * see whether we should grow to the large one.
                 */
                if (ocmap->scm_modulus == DRT_HASH_SMALL_MODULUS) {
                 * see whether we should grow to the large one.
                 */
                if (ocmap->scm_modulus == DRT_HASH_SMALL_MODULUS) {
-                       /* if the ring is nearly full */
-                       if (active_buckets > (DRT_HASH_SMALL_MODULUS - 5)) {
+                       /* 
+                        * If the ring is nearly full and we are allowed to
+                        * use the large modulus, upgrade.
+                        */
+                       if ((active_buckets > (DRT_HASH_SMALL_MODULUS - 5)) &&
+                           (max_mem >= DRT_HASH_LARGE_MEMORY_REQUIRED)) {
                                nsize = DRT_HASH_LARGE_MODULUS;
                        } else {
                                nsize = DRT_HASH_SMALL_MODULUS;
                                nsize = DRT_HASH_LARGE_MODULUS;
                        } else {
                                nsize = DRT_HASH_SMALL_MODULUS;
index 467eb00b2a0d4efb9bf413ed33745a7bd651c846..529129d9c6b74a63cac02f08198f2ed1447f6a6e 100644 (file)
  */
 struct mount *rootfs;
 struct vnode *rootvnode;
  */
 struct mount *rootfs;
 struct vnode *rootvnode;
+
+#ifdef CONFIG_IMGSRC_ACCESS
+struct vnode *imgsrc_rootvnode;
+#endif /* IMGSRC_ACESS */
+
 int (*mountroot)(void) = NULL;
 
 /*
 int (*mountroot)(void) = NULL;
 
 /*
index c44732bb830ca2d977b02ee34d73992e958b2550..d78894caf7059d29a2b8e1661fcc68c4754e0b28 100644 (file)
@@ -6901,8 +6901,6 @@ errno_t rmdir_remove_orphaned_appleDouble(vnode_t vp , vfs_context_t ctx, int *
        } while (!eofflag);
        /*
         * If we've made it here all the files in the dir are ._ files.
        } while (!eofflag);
        /*
         * If we've made it here all the files in the dir are ._ files.
-        * As we iterate through to delete them, we will verify that
-        * they are true AppleDouble files.
         * We can delete the files even though the node is suspended
         * because we are the owner of the file.
         */
         * We can delete the files even though the node is suspended
         * because we are the owner of the file.
         */
@@ -6943,61 +6941,12 @@ errno_t rmdir_remove_orphaned_appleDouble(vnode_t vp , vfs_context_t ctx, int *
                                            (dp->d_namlen == 2 && dp->d_name[0] == '.' && dp->d_name[1] == '.'))
                                          ) {
 
                                            (dp->d_namlen == 2 && dp->d_name[0] == '.' && dp->d_name[1] == '.'))
                                          ) {
 
-                               /*
-                                * This is a ._ file, so verify it is actually an AppleDouble
-                                * file by checking the header before we remove it.
-                                */
-                               vnode_t xvp = NULL;
-                               int did_namei = 0;
-
-                               NDINIT(&nd_temp, DELETE, USEDVP | LOCKPARENT,
-                                      UIO_SYSSPACE, CAST_USER_ADDR_T(dp->d_name), ctx);
+                               NDINIT(&nd_temp, DELETE, USEDVP, UIO_SYSSPACE, CAST_USER_ADDR_T(dp->d_name), ctx);
                                nd_temp.ni_dvp = vp;
                                nd_temp.ni_dvp = vp;
-                               error = namei(&nd_temp);
-
-                               if (error) {
-                                       if (error == ENOENT) {
-                                               error = 0;
-                                       } else {
-                                               error = ENOTEMPTY;
-                                       }
-                                       goto out1;
-                               }
-                               did_namei = 1;
-
-                               xvp = nd_temp.ni_vp;
-
-                               error = check_appledouble_header(xvp, ctx);
-                               if (error) {
-                                       error = ENOTEMPTY;
-                                       goto out1;
-                               }
-                               
-                               /* Remove the file. */
-                               error = VNOP_REMOVE(vp, xvp, &nd_temp.ni_cnd, 0, ctx);
-                               if (error) {
-                                       if (error == ENOENT) {
-                                               error = 0;
-                                       }
-                                       goto out1;
-                               }
-
-out1:
-                               /* drop extra reference on vp from LOCKPARENT namei */
-                               vnode_put (vp);
-
-                               if (did_namei) {
-                                       nameidone(&nd_temp);
-                                       did_namei = 0;
-                               }
-                               if (xvp) {
-                                       vnode_put(xvp);
-                                       xvp = NULL;
-                               }
-                               if (error) {
+                               error = unlink1(ctx, &nd_temp, 0);
+                               if (error && error != ENOENT) {
                                        goto outsc;
                                }
                                        goto outsc;
                                }
-
                        }
                        cpos += dp->d_reclen;
                        dp = (struct dirent*)cpos;
                        }
                        cpos += dp->d_reclen;
                        dp = (struct dirent*)cpos;
index 23653799fbcec651a7849d6e7822fe97b0b0914e..24dfd95b3c0b799735d5d89954ede199b83dd214 100644 (file)
@@ -153,6 +153,17 @@ static int munge_statfs(struct mount *mp, struct vfsstatfs *sfsp,
 static int statfs64_common(struct mount *mp, struct vfsstatfs *sfsp,
                        user_addr_t bufp);
 static int fsync_common(proc_t p, struct fsync_args *uap, int flags);
 static int statfs64_common(struct mount *mp, struct vfsstatfs *sfsp,
                        user_addr_t bufp);
 static int fsync_common(proc_t p, struct fsync_args *uap, int flags);
+
+#ifdef CONFIG_IMGSRC_ACCESS
+static int prepare_coveredvp(vnode_t vp, vfs_context_t ctx, struct componentname *cnp, const char *fsname);
+static int authorize_devpath_and_update_mntfromname(mount_t mp, user_addr_t devpath, vnode_t *devvpp, vfs_context_t ctx);
+static int place_mount_and_checkdirs(mount_t mp, vnode_t vp, vfs_context_t ctx);
+static void undo_place_on_covered_vp(mount_t mp, vnode_t vp);
+static int mount_begin_update(mount_t mp, vfs_context_t ctx, int flags);
+static void mount_end_update(mount_t mp);
+static int relocate_imageboot_source(vnode_t vp, struct componentname *cnp, const char *fsname, vfs_context_t ctx, boolean_t is64bit, user_addr_t fsmountargs);
+#endif /* CONFIG_IMGSRC_ACCESS */
+
 int (*union_dircheckp)(struct vnode **, struct fileproc *, vfs_context_t);
 
 __private_extern__
 int (*union_dircheckp)(struct vnode **, struct fileproc *, vfs_context_t);
 
 __private_extern__
@@ -297,6 +308,15 @@ __mac_mount(struct proc *p, register struct __mac_mount_args *uap, __unused int3
        if (error)
                goto out1;
        
        if (error)
                goto out1;
        
+#ifdef CONFIG_IMGSRC_ACCESS
+       if (uap->flags == MNT_IMGSRC) {
+               error = relocate_imageboot_source(vp, &nd.ni_cnd, fstypename, ctx, is_64bit, fsmountargs);
+               vnode_put(pvp);
+               vnode_put(vp);
+               return error;
+       }
+#endif /* CONFIG_IMGSRC_ACCESS */
+
        if (uap->flags & MNT_UPDATE) {
                if ((vp->v_flag & VROOT) == 0) {
                        error = EINVAL;
        if (uap->flags & MNT_UPDATE) {
                if ((vp->v_flag & VROOT) == 0) {
                        error = EINVAL;
@@ -323,6 +343,17 @@ __mac_mount(struct proc *p, register struct __mac_mount_args *uap, __unused int3
                        error = ENOTSUP;
                        goto out1;
                }
                        error = ENOTSUP;
                        goto out1;
                }
+
+#ifdef CONFIG_IMGSRC_ACCESS 
+               /* Can't downgrade the backer of the root FS */
+               if ((mp->mnt_kern_flag & MNTK_BACKS_ROOT) &&
+                       (!vfs_isrdonly(mp)) && (uap->flags & MNT_RDONLY))
+               {
+                       error = ENOTSUP;
+                       goto out1;
+               }
+#endif /* CONFIG_IMGSRC_ACCESS */
+
                /*
                 * Only root, or the user that did the original mount is
                 * permitted to update it.
                /*
                 * Only root, or the user that did the original mount is
                 * permitted to update it.
@@ -867,6 +898,368 @@ out1:
        return(error);
 }
 
        return(error);
 }
 
+#ifdef CONFIG_IMGSRC_ACCESS
+/* 
+ * Flush in-core data, check for competing mount attempts,
+ * and set VMOUNT
+ */
+static int
+prepare_coveredvp(vnode_t vp, vfs_context_t ctx, struct componentname *cnp, const char *fsname)
+{
+       struct vnode_attr va;
+       int error;
+
+       /*
+        * If the user is not root, ensure that they own the directory
+        * onto which we are attempting to mount.
+        */
+       VATTR_INIT(&va);
+       VATTR_WANTED(&va, va_uid);
+       if ((error = vnode_getattr(vp, &va, ctx)) ||
+           (va.va_uid != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
+            (!vfs_context_issuser(ctx)))) { 
+               error = EPERM;
+               goto out;
+       }
+
+       if ( (error = VNOP_FSYNC(vp, MNT_WAIT, ctx)) )
+               goto out;
+
+       if ( (error = buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0)) )
+               goto out;
+
+       if (vp->v_type != VDIR) {
+               error = ENOTDIR;
+               goto out;
+       }
+
+       if (ISSET(vp->v_flag, VMOUNT) && (vp->v_mountedhere != NULL)) {
+               error = EBUSY;
+               goto out;
+       }
+
+#if CONFIG_MACF
+       error = mac_mount_check_mount(ctx, vp,
+           cnp, fsname);
+       if (error != 0)
+               goto out;
+#endif
+
+       vnode_lock_spin(vp);
+       SET(vp->v_flag, VMOUNT);
+       vnode_unlock(vp);
+
+out:
+       return error;
+}
+
+static int
+authorize_devpath_and_update_mntfromname(mount_t mp, user_addr_t devpath, vnode_t *devvpp, vfs_context_t ctx)
+{
+       struct nameidata nd;
+       vnode_t vp;
+       mode_t accessmode;
+       int error;
+
+       NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, devpath, ctx);
+       if ( (error = namei(&nd)) )
+               return error;
+
+       strncpy(mp->mnt_vfsstat.f_mntfromname, nd.ni_cnd.cn_pnbuf, MAXPATHLEN);
+       vp = nd.ni_vp;
+       nameidone(&nd);
+
+       if (vp->v_type != VBLK) {
+               error = ENOTBLK;
+               goto out;
+       }
+       if (major(vp->v_rdev) >= nblkdev) {
+               error = ENXIO;
+               goto out;
+       }
+       /*
+        * If mount by non-root, then verify that user has necessary
+        * permissions on the device.
+        */
+       if (!vfs_context_issuser(ctx)) {
+               accessmode = KAUTH_VNODE_READ_DATA;
+               if ((mp->mnt_flag & MNT_RDONLY) == 0)
+                       accessmode |= KAUTH_VNODE_WRITE_DATA;
+               if ((error = vnode_authorize(vp, NULL, accessmode, ctx)) != 0)
+                       goto out;
+       }
+
+       *devvpp = vp;
+out:
+       if (error) {
+               vnode_put(vp);
+       }
+
+       return error;
+}
+
+/*
+ * Clear VMOUNT, set v_mountedhere, and mnt_vnodecovered, ref the vnode,
+ * and call checkdirs()
+ */
+static int
+place_mount_and_checkdirs(mount_t mp, vnode_t vp, vfs_context_t ctx)
+{
+       int error;
+
+       mp->mnt_vnodecovered = vp; /* XXX This is normally only set at init-time ... */
+
+       vnode_lock_spin(vp);
+       CLR(vp->v_flag, VMOUNT);
+       vp->v_mountedhere = mp;
+       vnode_unlock(vp);
+
+       /*
+        * taking the name_cache_lock exclusively will
+        * insure that everyone is out of the fast path who
+        * might be trying to use a now stale copy of
+        * vp->v_mountedhere->mnt_realrootvp
+        * bumping mount_generation causes the cached values
+        * to be invalidated
+        */
+       name_cache_lock();
+       mount_generation++;
+       name_cache_unlock();
+
+       error = vnode_ref(vp);
+       if (error != 0) {
+               goto out;
+       }
+
+       error = checkdirs(vp, ctx);
+       if (error != 0)  {
+               /* Unmount the filesystem as cdir/rdirs cannot be updated */
+               vnode_rele(vp);
+               goto out;
+       }
+
+out:
+       if (error != 0) {
+               mp->mnt_vnodecovered = NULLVP;
+       }
+       return error;
+}
+
+static void
+undo_place_on_covered_vp(mount_t mp, vnode_t vp)
+{
+       vnode_rele(vp);
+       vnode_lock_spin(vp);
+       vp->v_mountedhere = (mount_t)NULL;
+       vnode_unlock(vp);
+
+       mp->mnt_vnodecovered = NULLVP;
+}
+
+static int
+mount_begin_update(mount_t mp, vfs_context_t ctx, int flags)
+{
+       int error;
+
+       /* unmount in progress return error */
+       mount_lock_spin(mp);
+       if (mp->mnt_lflag & MNT_LUNMOUNT) {
+               mount_unlock(mp);
+               return EBUSY;
+       }
+       mount_unlock(mp);
+       lck_rw_lock_exclusive(&mp->mnt_rwlock);
+
+       /*
+        * We only allow the filesystem to be reloaded if it
+        * is currently mounted read-only.
+        */
+       if ((flags & MNT_RELOAD) &&
+                       ((mp->mnt_flag & MNT_RDONLY) == 0)) {
+               error = ENOTSUP;
+               goto out;
+       }
+
+       /*
+        * Only root, or the user that did the original mount is
+        * permitted to update it.
+        */
+       if (mp->mnt_vfsstat.f_owner != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
+                       (!vfs_context_issuser(ctx))) { 
+               error = EPERM;
+               goto out;
+       }
+#if CONFIG_MACF
+       error = mac_mount_check_remount(ctx, mp);
+       if (error != 0) {
+               goto out;
+       }
+#endif
+
+out:
+       if (error) {
+               lck_rw_done(&mp->mnt_rwlock);
+       }
+
+       return error;
+}
+
+static void 
+mount_end_update(mount_t mp)
+{
+       lck_rw_done(&mp->mnt_rwlock);
+}
+
+static int
+relocate_imageboot_source(vnode_t vp, struct componentname *cnp, 
+               const char *fsname, vfs_context_t ctx, 
+               boolean_t is64bit, user_addr_t fsmountargs)
+{
+       int error;
+       mount_t mp;
+       boolean_t placed = FALSE;
+       vnode_t devvp;
+       struct vfstable *vfsp;
+       user_addr_t devpath;
+       char *old_mntonname;
+
+       /* If we didn't imageboot, nothing to move */
+       if (imgsrc_rootvnode == NULLVP) {
+               return EINVAL;
+       }
+
+       /* Only root can do this */
+       if (!vfs_context_issuser(ctx)) {
+               return EPERM;
+       }
+
+       error = vnode_get(imgsrc_rootvnode);
+       if (error != 0) {
+               return error;
+       }
+
+       MALLOC(old_mntonname, char*, MAXPATHLEN, M_TEMP, M_WAITOK);
+
+       /* Can only move once */
+       mp = vnode_mount(imgsrc_rootvnode);
+       if ((mp->mnt_kern_flag & MNTK_HAS_MOVED) == MNTK_HAS_MOVED) {
+               error = EBUSY;
+               goto out0;
+       }
+
+       /* Get exclusive rwlock on mount, authorize update on mp */
+       error = mount_begin_update(mp , ctx, 0);
+       if (error != 0) {
+               goto out0;
+       }
+
+       /* 
+        * It can only be moved once.  Flag is set under the rwlock,
+        * so we're now safe to proceed.
+        */
+       if ((mp->mnt_kern_flag & MNTK_HAS_MOVED) == MNTK_HAS_MOVED) {
+               goto out1;
+       }
+
+       /* Mark covered vnode as mount in progress, authorize placing mount on top */
+       error = prepare_coveredvp(vp, ctx, cnp, fsname);
+       if (error != 0) {
+               goto out1;
+       }
+       
+       /* Sanity check the name caller has provided */
+       vfsp = mp->mnt_vtable;
+       if (strncmp(vfsp->vfc_name, fsname, MFSNAMELEN) != 0) {
+               error = EINVAL;
+               goto out2;
+       }
+
+       /* Check the device vnode and update mount-from name, for local filesystems */
+       if (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS) {
+               if (is64bit) {
+                       if ( (error = copyin(fsmountargs, (caddr_t)&devpath, sizeof(devpath))) )
+                               goto out2;      
+                       fsmountargs += sizeof(devpath);
+               } else {
+                       user32_addr_t tmp;
+                       if ( (error = copyin(fsmountargs, (caddr_t)&tmp, sizeof(tmp))) )
+                               goto out2;      
+                       /* munge into LP64 addr */
+                       devpath = CAST_USER_ADDR_T(tmp);
+                       fsmountargs += sizeof(tmp);
+               }
+
+               if (devpath != USER_ADDR_NULL) {
+                       error = authorize_devpath_and_update_mntfromname(mp, devpath, &devvp, ctx);
+                       if (error) {
+                               goto out2;
+                       }
+
+                       vnode_put(devvp);
+               }
+       }
+
+       /* 
+        * Place mp on top of vnode, ref the vnode,  call checkdirs(),
+        * and increment the name cache's mount generation 
+        */
+       error = place_mount_and_checkdirs(mp, vp, ctx);
+       if (error != 0) {
+               goto out2;
+       }
+
+       placed = TRUE;
+
+       strncpy(old_mntonname, mp->mnt_vfsstat.f_mntonname, MAXPATHLEN);
+       strncpy(mp->mnt_vfsstat.f_mntonname, cnp->cn_pnbuf, MAXPATHLEN);
+
+       /* Forbid future moves */
+       mount_lock(mp);
+       mp->mnt_kern_flag |= MNTK_HAS_MOVED;
+       mount_unlock(mp);
+
+       /* Finally, add to mount list, completely ready to go */
+       error = mount_list_add(mp);
+       if (error != 0) {
+               goto out3;
+       }
+
+       mount_end_update(mp);
+       vnode_put(imgsrc_rootvnode);
+       FREE(old_mntonname, M_TEMP);
+
+       return 0;
+out3:
+       strncpy(mp->mnt_vfsstat.f_mntonname, old_mntonname, MAXPATHLEN);
+
+       mount_lock(mp);
+       mp->mnt_kern_flag &= ~(MNTK_HAS_MOVED);
+       mount_unlock(mp);
+
+out2:
+       /* 
+        * Placing the mp on the vnode clears VMOUNT,
+        * so cleanup is different after that point 
+        */
+       if (placed) {
+               /* Rele the vp, clear VMOUNT and v_mountedhere */
+               undo_place_on_covered_vp(mp, vp);
+       } else {
+               vnode_lock_spin(vp);
+               CLR(vp->v_flag, VMOUNT);
+               vnode_unlock(vp);
+       }
+out1:
+       mount_end_update(mp);
+
+out0:
+       vnode_put(imgsrc_rootvnode);
+       FREE(old_mntonname, M_TEMP);
+       return error;
+}
+
+#endif /* CONFIG_IMGSRC_ACCESS */
+
 void
 enablequotas(struct mount *mp, vfs_context_t ctx)
 {
 void
 enablequotas(struct mount *mp, vfs_context_t ctx)
 {
@@ -1086,6 +1479,13 @@ safedounmount(struct mount *mp, int flags, vfs_context_t ctx)
                goto out;
        }
 
                goto out;
        }
 
+#ifdef CONFIG_IMGSRC_ACCESS
+       if (mp->mnt_kern_flag & MNTK_BACKS_ROOT) {
+               error = EBUSY;
+               goto out;
+       }
+#endif /* CONFIG_IMGSRC_ACCESS */
+
        return (dounmount(mp, flags, 1, ctx));
 
 out:
        return (dounmount(mp, flags, 1, ctx));
 
 out:
index c28573b0a794e5411ea879ec9d525f58ad29f9a5..9a00f1027c64ca00e18deda96c020b9af7fd0cb0 100644 (file)
@@ -221,5 +221,18 @@ do_build_all:      build_symbol_sets
 
 do_build_install:      install_symbol_sets 
 
 
 do_build_install:      install_symbol_sets 
 
+EXPORTS_FILE_LIST = $(addprefix $(SOURCE)/,$(foreach set,$(SYMBOL_COMPONENT_LIST), $(set).exports $(set).$(ARCH_CONFIG_LC).exports))
+EXPORTS_FILE_LIST_NOSYSTEM60 = $(addprefix $(SOURCE)/,$(foreach set, $(filter-out System6.0,$(SYMBOL_COMPONENT_LIST)), $(set).exports $(set).$(ARCH_CONFIG_LC).exports))
+
+# Does not include "whole-kernel" clients
+build_mach_kernel_exports:
+       $(_v)if [ $(SUPPORT_SYSTEM60_KEXT) -eq 1 ]; then \
+               $(SOURCE)/generate_linker_exports.sh $(OBJPATH)/kernel-kpi.exp \
+                        $(EXPORTS_FILE_LIST) || exit 1; \
+       else \
+               $(SOURCE)/generate_linker_exports.sh $(OBJPATH)/kernel-kpi.exp \
+                        $(EXPORTS_FILE_LIST_NOSYSTEM60) || exit 1; \
+       fi;
+
 include $(MakeInc_rule)
 include $(MakeInc_dir)
 include $(MakeInc_rule)
 include $(MakeInc_dir)
index 44aab8c5cc8fceb84dfe8569460cf68453a3208f..6b41eba65a100433e5352c819ea602f5d6f8c839 100644 (file)
@@ -1,4 +1,4 @@
-10.2.0
+10.3.0
 
 # The first line of this file contains the master version number for the kernel.
 # All other instances of the kernel version in xnu are derived from this file.
 
 # The first line of this file contains the master version number for the kernel.
 # All other instances of the kernel version in xnu are derived from this file.
diff --git a/config/generate_linker_exports.sh b/config/generate_linker_exports.sh
new file mode 100755 (executable)
index 0000000..4af69e9
--- /dev/null
@@ -0,0 +1,15 @@
+#!/bin/sh
+
+set -e
+
+if [ $# -lt 2 ]; then
+    echo "Usage: $0 output.exp input1 [input2 ... ]" 1>&2
+    exit 1
+fi
+
+OUTPUT="$1"
+shift
+
+( grep -h -v ":" "$@"; grep -h ":" "$@" | awk -F: '{print $2}' ) | sort -u > "$OUTPUT"
+
+exit 0
index 60343290091fb97f00a2f1f49bef339df4e4169f..66c178d5fb6387174a40196963d9dba42602dd60 100644 (file)
@@ -482,7 +482,8 @@ IODMACommand::walkAll(UInt8 op)
 
        if (state->fLocalMapper)
        {
 
        if (state->fLocalMapper)
        {
-           state->fLocalMapperPageCount = atop_64(round_page(state->fPreparedLength));
+           state->fLocalMapperPageCount = atop_64(round_page(
+                   state->fPreparedLength + ((state->fPreparedOffset + fMDSummary.fPageAlign) & page_mask)));
            state->fLocalMapperPageAlloc = fMapper->iovmAllocDMACommand(this, state->fLocalMapperPageCount);
            state->fMapContig = true;
        }
            state->fLocalMapperPageAlloc = fMapper->iovmAllocDMACommand(this, state->fLocalMapperPageCount);
            state->fMapContig = true;
        }
index fd150a4f64290e91816765fe434a652f47b606ff..c7b4319ed3b49067ab99dc5434066203f4942b4d 100644 (file)
@@ -2950,6 +2950,17 @@ IOReturn IOService::startPowerChange (
                         &powerState, changeFlags);
 #endif
 
                         &powerState, changeFlags);
 #endif
 
+    // Invalidate the last recorded tickle power state when a power transition
+    // is about to occur, and not as a result of a tickle request.
+
+    if ((getPMRequestType() != kIOPMRequestTypeActivityTickle) &&
+        (fActivityTicklePowerState != -1))
+    {
+        IOLockLock(fActivityLock);
+        fActivityTicklePowerState = -1;
+        IOLockUnlock(fActivityLock);
+    }
+
        // Initialize the change note.
 
     fHeadNoteFlags            = changeFlags;
        // Initialize the change note.
 
     fHeadNoteFlags            = changeFlags;
index 4f6fc2bbe0d6bd3b96434801b887c45f9abd15d8..82cd0eeff96d9b313f324392ed2353b27e24481a 100644 (file)
--- a/kgmacros
+++ b/kgmacros
@@ -64,6 +64,7 @@ document kgm
 |     showtaskvme    Display info about the task's vm_map entries
 |     showtaskipc    Display info about the specified task's ipc space
 |     showtaskrights Display info about the task's ipc space entries
 |     showtaskvme    Display info about the task's vm_map entries
 |     showtaskipc    Display info about the specified task's ipc space
 |     showtaskrights Display info about the task's ipc space entries
+|     showtaskrightsbt Display info about the task's ipc space entries with back traces
 |     showtaskbusyports    Display all of the task's ports with unread messages
 |
 |     showact       Display info about a thread specified by activation
 |     showtaskbusyports    Display all of the task's ports with unread messages
 |
 |     showact       Display info about a thread specified by activation
@@ -213,7 +214,8 @@ document kgm
 |     showallgdbcorestacks Corefile equivalent of "showallgdbstacks"
 |     kdp-reenter      Schedule reentry into the debugger and continue.
 |     kdp-reboot       Restart remote target
 |     showallgdbcorestacks Corefile equivalent of "showallgdbstacks"
 |     kdp-reenter      Schedule reentry into the debugger and continue.
 |     kdp-reboot       Restart remote target
-|     kdp-version       Get KDP version number
+|     kdp-version      Get KDP version number
+|     kdp-connect      "shorthand" connection macro
 |
 |     zstack           Print zalloc caller stack (zone leak debugging)
 |     findoldest       Find oldest zone leak debugging record
 |
 |     zstack           Print zalloc caller stack (zone leak debugging)
 |     findoldest       Find oldest zone leak debugging record
@@ -1136,6 +1138,10 @@ define showipcint
             if $kgm_ie.ie_bits & 0x001f0000
                 set $kgm_name = (($kgm_iindex << 8)|($kgm_ie.ie_bits >> 24))
                 showipceint $kgm_iep $kgm_name
             if $kgm_ie.ie_bits & 0x001f0000
                 set $kgm_name = (($kgm_iindex << 8)|($kgm_ie.ie_bits >> 24))
                 showipceint $kgm_iep $kgm_name
+                if $arg2 != 0 && $kgm_ie.ie_object != 0 && ($kgm_ie.ie_bits & 0x00070000) && ((ipc_port_t) $kgm_ie.ie_object)->ip_callstack[0] != 0
+                    printf "              user bt: "
+                    showportbt $kgm_ie.ie_object $kgm_is.is_task
+                end
             end
             set $kgm_iindex = $kgm_iindex + 1
             set $kgm_iep = &($kgm_is.is_table[$kgm_iindex])
             end
             set $kgm_iindex = $kgm_iindex + 1
             set $kgm_iep = &($kgm_is.is_table[$kgm_iindex])
@@ -1151,7 +1157,7 @@ end
 define showipc
     set $kgm_isp = (ipc_space_t)$arg0
     showipcheader
 define showipc
     set $kgm_isp = (ipc_space_t)$arg0
     showipcheader
-    showipcint $kgm_isp 0
+    showipcint $kgm_isp 0 0
 end
 document showipc
 Syntax: (gdb) showipc <ipc_space>
 end
 document showipc
 Syntax: (gdb) showipc <ipc_space>
@@ -1161,7 +1167,7 @@ end
 define showrights
        set $kgm_isp = (ipc_space_t)$arg0
     showipcheader
 define showrights
        set $kgm_isp = (ipc_space_t)$arg0
     showipcheader
-       showipcint $kgm_isp 1
+       showipcint $kgm_isp 1 0
 end
 document showrights
 Syntax: (gdb) showrights <ipc_space>
 end
 document showrights
 Syntax: (gdb) showrights <ipc_space>
@@ -1174,7 +1180,7 @@ define showtaskipc
        showtaskheader
     showipcheader
        showtaskint $kgm_taskp
        showtaskheader
     showipcheader
        showtaskint $kgm_taskp
-       showipcint $kgm_taskp->itk_space 0
+       showipcint $kgm_taskp->itk_space 0 0
 end
 document showtaskipc
 Syntax: (gdb) showtaskipc <task>
 end
 document showtaskipc
 Syntax: (gdb) showtaskipc <task>
@@ -1187,13 +1193,25 @@ define showtaskrights
        showtaskheader
     showipcheader
        showtaskint $kgm_taskp
        showtaskheader
     showipcheader
        showtaskint $kgm_taskp
-       showipcint $kgm_taskp->itk_space 1
+       showipcint $kgm_taskp->itk_space 1 0
 end
 document showtaskrights
 Syntax: (gdb) showtaskrights <task>
 | Routine to print info about the ipc rights for a task
 end
 
 end
 document showtaskrights
 Syntax: (gdb) showtaskrights <task>
 | Routine to print info about the ipc rights for a task
 end
 
+define showtaskrightsbt
+       set $kgm_taskp = (task_t)$arg0
+       showtaskheader
+    showipcheader
+       showtaskint $kgm_taskp
+       showipcint $kgm_taskp->itk_space 1 1
+end
+document showtaskrightsbt
+Syntax: (gdb) showtaskrightsbt <task>
+| Routine to print info about the ipc rights for a task with backtraces
+end
+
 define showallipc
     set $kgm_head_taskp = &tasks
     set $kgm_cur_taskp = (struct task *)($kgm_head_taskp->next)
 define showallipc
     set $kgm_head_taskp = &tasks
     set $kgm_cur_taskp = (struct task *)($kgm_head_taskp->next)
@@ -1201,7 +1219,7 @@ define showallipc
         showtaskheader
         showipcheader
         showtaskint $kgm_cur_taskp
         showtaskheader
         showipcheader
         showtaskint $kgm_cur_taskp
-        showipcint $kgm_cur_taskp->itk_space 0
+        showipcint $kgm_cur_taskp->itk_space 0 0
        set $kgm_cur_taskp = (struct task *)($kgm_cur_taskp->tasks.next)
     end
 end
        set $kgm_cur_taskp = (struct task *)($kgm_cur_taskp->tasks.next)
     end
 end
@@ -1218,7 +1236,7 @@ define showallrights
         showtaskheader
         showipcheader
         showtaskint $kgm_cur_taskp
         showtaskheader
         showipcheader
         showtaskint $kgm_cur_taskp
-        showipcint $kgm_cur_taskp->itk_space 1
+        showipcint $kgm_cur_taskp->itk_space 1 0
        set $kgm_cur_taskp = (struct task *)($kgm_cur_taskp->tasks.next)
     end
 end
        set $kgm_cur_taskp = (struct task *)($kgm_cur_taskp->tasks.next)
     end
 end
@@ -1631,6 +1649,25 @@ define showportmember
     printf "0x%08x\n", $kgm_portp->ip_messages.data.port.msgcount
 end
 
     printf "0x%08x\n", $kgm_portp->ip_messages.data.port.msgcount
 end
 
+define showportbt
+    set $kgm_iebt = ((ipc_port_t) $arg0)->ip_callstack
+    set $kgm_iepid = ((ipc_port_t) $arg0)->ip_spares[0]
+    set $kgm_procpid = ((proc_t) (((task_t) $arg1)->bsd_info))->p_pid
+    if $kgm_iebt[0] != 0
+        showptr $kgm_iebt[0]
+        set $kgm_iebt_loop_ctr = 1
+        while ($kgm_iebt_loop_ctr < 16 && $kgm_iebt[$kgm_iebt_loop_ctr])
+            printf " "
+            showptr $kgm_iebt[$kgm_iebt_loop_ctr]
+            set $kgm_iebt_loop_ctr = $kgm_iebt_loop_ctr + 1
+        end
+        if $kgm_iepid != $kgm_procpid
+            printf " (%d)", $kgm_iepid
+        end
+        printf "\n"
+    end
+end
+
 define showportint
     printf "0x%08x  ", $arg0
     set $kgm_portp = (struct ipc_port *)$arg0
 define showportint
     printf "0x%08x  ", $arg0
     set $kgm_portp = (struct ipc_port *)$arg0
@@ -2562,7 +2599,7 @@ define getdumpinfo
        dumpinfoint KDP_DUMPINFO_GETINFO
        set $kgm_dumpinfo = (kdp_dumpinfo_reply_t *) manual_pkt.data
        if $kgm_dumpinfo->type & KDP_DUMPINFO_REBOOT
        dumpinfoint KDP_DUMPINFO_GETINFO
        set $kgm_dumpinfo = (kdp_dumpinfo_reply_t *) manual_pkt.data
        if $kgm_dumpinfo->type & KDP_DUMPINFO_REBOOT
-                 printf "Sysem will reboot after kernel info gets dumped.\n"
+                 printf "System will reboot after kernel info gets dumped.\n"
        else
                  printf "Sysem will not reboot after kernel info gets dumped.\n"
        end
        else
                  printf "Sysem will not reboot after kernel info gets dumped.\n"
        end
@@ -10182,3 +10219,17 @@ Syntax:  showallbusyports
 |Routine to print information about all receive rights on the system that
 |have enqueued messages.
 end
 |Routine to print information about all receive rights on the system that
 |have enqueued messages.
 end
+
+define kdp-connect
+    if $argc > 0
+       kdp-reattach $arg0
+    else
+       printf "Attempting to attach to localhost...\n"
+       kdp-reattach localhost
+    end
+end
+
+document kdp-connect
+Syntax: (gdb) kdpconnect <address-of-remote-host>
+| Attach to the machine with given hostname or IP address, or 'localhost' if blank 
+end
index 9f41164919163edbcac23146f4fc6899191da70c..acc3e3e98e128f2cd1affa85e865b2fba80c2000 100644 (file)
@@ -3949,21 +3949,6 @@ OSKext::load(
     Boolean              alreadyLoaded                = false;
     OSKext             * lastLoadedKext               = NULL;
 
     Boolean              alreadyLoaded                = false;
     OSKext             * lastLoadedKext               = NULL;
 
-    if (!sLoadEnabled) {
-        if (!isLoaded() || (!isStarted() && startOpt != kOSKextExcludeNone) ||
-            (startMatchingOpt != kOSKextExcludeNone)) {
-
-            OSKextLog(this,
-                kOSKextLogErrorLevel |
-                kOSKextLogLoadFlag,
-                "Kext loading is disabled "
-                "(attempt to load/start/start matching for kext %s).",
-                getIdentifierCString());
-        }
-        result = kOSKextReturnDisabled;
-        goto finish;
-    }
-
     if (isLoaded()) {
         alreadyLoaded = true;
         result = kOSReturnSuccess;
     if (isLoaded()) {
         alreadyLoaded = true;
         result = kOSReturnSuccess;
@@ -3976,6 +3961,16 @@ OSKext::load(
         goto loaded;
     }
 
         goto loaded;
     }
 
+    if (!sLoadEnabled) {
+        OSKextLog(this,
+            kOSKextLogErrorLevel |
+            kOSKextLogLoadFlag,
+            "Kext loading is disabled (attempt to load kext %s).",
+            getIdentifierCString());
+        result = kOSKextReturnDisabled;
+        goto finish;
+    }
+
    /* If we've pushed the next available load tag to the invalid value,
     * we can't load any more kexts.
     */
    /* If we've pushed the next available load tag to the invalid value,
     * we can't load any more kexts.
     */
@@ -4136,9 +4131,7 @@ OSKext::load(
     OSKext::saveLoadedKextPanicList();
 
 loaded:
     OSKext::saveLoadedKextPanicList();
 
 loaded:
-   /* This is a bit of a hack, because we shouldn't be handling 
-    * personalities within the load function.
-    */
+
     if (declaresExecutable() && (startOpt == kOSKextExcludeNone)) {
         result = start();
         if (result != kOSReturnSuccess) {
     if (declaresExecutable() && (startOpt == kOSKextExcludeNone)) {
         result = start();
         if (result != kOSReturnSuccess) {
@@ -4152,12 +4145,32 @@ loaded:
     
    /* If not excluding matching, send the personalities to the kernel.
     * This never affects the result of the load operation.
     
    /* If not excluding matching, send the personalities to the kernel.
     * This never affects the result of the load operation.
+    * This is a bit of a hack, because we shouldn't be handling 
+    * personalities within the load function.
     */
     if (result == kOSReturnSuccess && startMatchingOpt == kOSKextExcludeNone) {
     */
     if (result == kOSReturnSuccess && startMatchingOpt == kOSKextExcludeNone) {
-        sendPersonalitiesToCatalog(true, personalityNames);
+        result = sendPersonalitiesToCatalog(true, personalityNames);
     }
     }
-    
 finish:
 finish:
+
+   /* More hack! If the kext doesn't declare an executable, even if we
+    * "loaded" it, we have to remove any personalities naming it, or we'll
+    * never see the registry go quiet. Errors here do not count for the
+    * load operation itself.
+    *
+    * Note that in every other regard it's perfectly ok for a kext to
+    * not declare an executable and serve only as a package for personalities
+    * naming another kext, so we do have to allow such kexts to be "loaded"
+    * so that those other personalities get added & matched.
+    */
+    if (!declaresExecutable()) {
+        OSKextLog(this,
+            kOSKextLogStepLevel | kOSKextLogLoadFlag,
+            "Kext %s has no executable; removing any personalities naming it.",
+            getIdentifierCString());
+        removePersonalitiesFromCatalog();
+    }
+
     if (result != kOSReturnSuccess) {
         OSKextLog(this,
             kOSKextLogErrorLevel |
     if (result != kOSReturnSuccess) {
         OSKextLog(this,
             kOSKextLogErrorLevel |
@@ -4721,6 +4734,16 @@ OSKext::start(bool startDependenciesFlag)
         goto finish;
     }
 
         goto finish;
     }
 
+    if (!sLoadEnabled) {
+        OSKextLog(this,
+            kOSKextLogErrorLevel |
+            kOSKextLogLoadFlag,
+            "Kext loading is disabled (attempt to start kext %s).",
+            getIdentifierCString());
+        result = kOSKextReturnDisabled;
+        goto finish;
+    }
+
     result = validateKextMapping(/* start? */ true);
     if (result != kOSReturnSuccess) {
         goto finish;
     result = validateKextMapping(/* start? */ true);
     if (result != kOSReturnSuccess) {
         goto finish;
@@ -7763,15 +7786,26 @@ finish:
 /*********************************************************************
 Might want to change this to a bool return?
 *********************************************************************/
 /*********************************************************************
 Might want to change this to a bool return?
 *********************************************************************/
-void
+OSReturn
 OSKext::sendPersonalitiesToCatalog(
     bool      startMatching,
     OSArray * personalityNames)
 {
 OSKext::sendPersonalitiesToCatalog(
     bool      startMatching,
     OSArray * personalityNames)
 {
-    OSArray      * personalitiesToSend     = NULL;  // must release
-    OSDictionary * kextPersonalities = NULL;  // do not release
+    OSReturn       result              = kOSReturnSuccess;
+    OSArray      * personalitiesToSend = NULL;  // must release
+    OSDictionary * kextPersonalities   = NULL;  // do not release
     int            count, i;
 
     int            count, i;
 
+    if (!sLoadEnabled) {
+        OSKextLog(this,
+            kOSKextLogErrorLevel |
+            kOSKextLogLoadFlag,
+            "Kext loading is disabled (attempt to start matching for kext %s).",
+            getIdentifierCString());
+        result = kOSKextReturnDisabled;
+        goto finish;
+    }
+
     if (sSafeBoot && !isLoadableInSafeBoot()) {
         OSKextLog(this,
             kOSKextLogErrorLevel |
     if (sSafeBoot && !isLoadableInSafeBoot()) {
         OSKextLog(this,
             kOSKextLogErrorLevel |
@@ -7779,7 +7813,8 @@ OSKext::sendPersonalitiesToCatalog(
             "Kext %s is not loadable during safe boot; "
             "not sending personalities to the IOCatalogue.",
             getIdentifierCString());
             "Kext %s is not loadable during safe boot; "
             "not sending personalities to the IOCatalogue.",
             getIdentifierCString());
-        return;
+        result = kOSKextReturnNotLoadable;
+        goto finish;
     }
 
     if (!personalityNames || !personalityNames->getCount()) {
     }
 
     if (!personalityNames || !personalityNames->getCount()) {
@@ -7788,10 +7823,12 @@ OSKext::sendPersonalitiesToCatalog(
         kextPersonalities = OSDynamicCast(OSDictionary,
             getPropertyForHostArch(kIOKitPersonalitiesKey));
         if (!kextPersonalities || !kextPersonalities->getCount()) {
         kextPersonalities = OSDynamicCast(OSDictionary,
             getPropertyForHostArch(kIOKitPersonalitiesKey));
         if (!kextPersonalities || !kextPersonalities->getCount()) {
+            // not an error
             goto finish;
         }
         personalitiesToSend = OSArray::withCapacity(0);
         if (!personalitiesToSend) {
             goto finish;
         }
         personalitiesToSend = OSArray::withCapacity(0);
         if (!personalitiesToSend) {
+            result = kOSKextReturnNoMemory;
             goto finish;
         }
         count = personalityNames->getCount();
             goto finish;
         }
         count = personalityNames->getCount();
@@ -7824,10 +7861,12 @@ finish:
     if (personalitiesToSend) {
         personalitiesToSend->release();
     }
     if (personalitiesToSend) {
         personalitiesToSend->release();
     }
-    return;
+    return result;
 }
 
 /*********************************************************************
 }
 
 /*********************************************************************
+* xxx - We should allow removing the kext's declared personalities,
+* xxx - even with other bundle identifiers.
 *********************************************************************/
 void
 OSKext::removePersonalitiesFromCatalog(void)
 *********************************************************************/
 void
 OSKext::removePersonalitiesFromCatalog(void)
index 2ca7aafb210f10b3cecadc0553acaf2d8ddc6a33..b303c36038c2f0814cd0e2c47ae80d65f00d5973 100644 (file)
@@ -67,3 +67,6 @@ options               IPSEC                   # IP security   # <ipsec>
 
 options                CONFIG_KXLD             # kxld/runtime linking of kexts # <config_kxld>
 
 
 options                CONFIG_KXLD             # kxld/runtime linking of kexts # <config_kxld>
 
+# secure_kernel - secure kernel from user programs
+options     SECURE_KERNEL       # <secure_kernel> 
+
index c763d0ac4cc89129af3780f36b1cb55d6941a0e7..15f992d67095b12d51610163ad7c4e0167ba0d85 100644 (file)
@@ -68,6 +68,7 @@ libkern/stack_protector.c       standard
 libkern/kxld/kxld.c             optional config_kxld
 libkern/kxld/kxld_array.c       optional config_kxld
 libkern/kxld/kxld_copyright.c   optional config_kxld
 libkern/kxld/kxld.c             optional config_kxld
 libkern/kxld/kxld_array.c       optional config_kxld
 libkern/kxld/kxld_copyright.c   optional config_kxld
+libkern/kxld/kxld_demangle.c    optional config_kxld
 libkern/kxld/kxld_dict.c        optional config_kxld
 libkern/kxld/kxld_kext.c        optional config_kxld
 libkern/kxld/kxld_reloc.c       optional config_kxld
 libkern/kxld/kxld_dict.c        optional config_kxld
 libkern/kxld/kxld_kext.c        optional config_kxld
 libkern/kxld/kxld_reloc.c       optional config_kxld
index 84412e08fa3a46b759050abdc94ae6a5a081def9..9bc3566c6b523949ddf3457c501a6798f7ed12df 100644 (file)
@@ -59,7 +59,7 @@ CFLAGS=-std=c99 -Wall -Wextra -Werror -pedantic -Wformat=2 -Wcast-align \
        -isysroot $(SDKROOT)
 LDFLAGS=$(ARCHS) -dynamiclib -install_name $(LIBKXLDNAME) \
        -compatibility_version $(COMPATIBILITY_VERSION) \
        -isysroot $(SDKROOT)
 LDFLAGS=$(ARCHS) -dynamiclib -install_name $(LIBKXLDNAME) \
        -compatibility_version $(COMPATIBILITY_VERSION) \
-       -current_version $(CURRENT_VERSION) -isysroot $(SDKROOT)
+       -current_version $(CURRENT_VERSION) -isysroot $(SDKROOT) -lstdc++
 INCLUDES=-I$(HDRSRC) $(INCFLAGS_EXTERN)
 
 # Tools
 INCLUDES=-I$(HDRSRC) $(INCFLAGS_EXTERN)
 
 # Tools
@@ -74,9 +74,9 @@ endif
 
 # Files
 HDR_NAMES=kxld.h kxld_types.h
 
 # Files
 HDR_NAMES=kxld.h kxld_types.h
-OBJ_NAMES=kxld.o kxld_array.o kxld_copyright.o kxld_dict.o kxld_kext.o kxld_reloc.o \
-       kxld_sect.o kxld_seg.o kxld_sym.o kxld_state.o kxld_symtab.o kxld_util.o \
-       kxld_uuid.o kxld_vtable.o
+OBJ_NAMES=kxld.o kxld_array.o kxld_copyright.o kxld_demangle.o kxld_dict.o \
+       kxld_kext.o kxld_reloc.o kxld_sect.o kxld_seg.o kxld_sym.o kxld_state.o \
+       kxld_symtab.o kxld_util.o kxld_uuid.o kxld_vtable.o
 HDRS=$(addprefix $(HDRSRC)/, $(HDR_NAMES))
 OBJS=$(addprefix $(OBJROOT)/, $(OBJ_NAMES))
 
 HDRS=$(addprefix $(HDRSRC)/, $(HDR_NAMES))
 OBJS=$(addprefix $(OBJROOT)/, $(OBJ_NAMES))
 
index b04a6045ac1b9eee216bab7f7448c04b03565394..9720f3d08967e0a190bad159a5291cfd38630416 100644 (file)
@@ -86,8 +86,8 @@ kxld_array_init(KXLDArray *array, size_t itemsize, u_int nitems)
          */
         if (array->maxitems < nitems) {
             STAILQ_FOREACH_SAFE(srcpool, &array->pools, entries, tmp) {
          */
         if (array->maxitems < nitems) {
             STAILQ_FOREACH_SAFE(srcpool, &array->pools, entries, tmp) {
-                STAILQ_INSERT_TAIL(&srcpools, srcpool, entries);
                 STAILQ_REMOVE(&array->pools, srcpool, kxld_array_pool, entries);
                 STAILQ_REMOVE(&array->pools, srcpool, kxld_array_pool, entries);
+                STAILQ_INSERT_TAIL(&srcpools, srcpool, entries);
             }
             srcpool_capacity = array->pool_capacity;
             bzero(array, sizeof(*array));
             }
             srcpool_capacity = array->pool_capacity;
             bzero(array, sizeof(*array));
diff --git a/libkern/kxld/kxld_demangle.c b/libkern/kxld/kxld_demangle.c
new file mode 100644 (file)
index 0000000..98ca4d5
--- /dev/null
@@ -0,0 +1,46 @@
+#if !KERNEL
+
+#include <stdlib.h>
+
+/* This demangler is part of the C++ ABI.  We don't include it directly from
+ * <cxxabi.h> so that we can avoid using C++ in the kernel linker.
+ */
+extern char * 
+__cxa_demangle(const char* __mangled_name, char* __output_buffer,
+               size_t* __length, int* __status);
+
+#endif /* !KERNEL */
+
+#include "kxld_demangle.h"
+
+/*******************************************************************************
+*******************************************************************************/
+const char *
+kxld_demangle(const char *str, char **buffer __unused, size_t *length __unused)
+{
+#if KERNEL
+    return str;
+#else
+    const char *rval = NULL;
+    char *demangled = NULL;
+    int status;
+
+    if (!str) goto finish;
+
+    rval = str;
+
+    if (!buffer || !length) goto finish;
+
+    /* Symbol names in the symbol table have an extra '_' prepended to them,
+     * so we skip the first character to make the demangler happy.
+     */
+    demangled = __cxa_demangle(str+1, *buffer, length, &status);
+    if (!demangled || status) goto finish;
+    
+    *buffer = demangled;
+    rval = demangled;
+finish:
+    return rval;
+#endif
+}
+
diff --git a/libkern/kxld/kxld_demangle.h b/libkern/kxld/kxld_demangle.h
new file mode 100644 (file)
index 0000000..1fee331
--- /dev/null
@@ -0,0 +1,24 @@
+#ifndef _KXLD_DEMANGLE_H_
+#define _KXLD_DEMANGLE_H_
+
+#include <sys/types.h>
+
+/* @function kxld_demangle
+   
+ * @abstract Demangles c++ symbols. 
+ * 
+ * @param str           The C-string to be demangled.
+ * @param buffer        A pointer to a character buffer for storing the result.
+ *                      If NULL, a buffer will be malloc'd and stored here.
+ *                      If the buffer is not large enough, it will be realloc'd.
+ *
+ * @param length        The length of the buffer.
+ * 
+ * @result              If the input string could be demangled, it returns the
+ *                      demangled string.  Otherwise, returns the input string.
+ * 
+ */
+const char * kxld_demangle(const char *str, char **buffer, size_t *length)
+    __attribute__((pure, nonnull, visibility("hidden")));
+
+#endif /* !_KXLD_DEMANGLE_H_ */
index 7b5623003090603a876d7b10bcaa0451ef41a581..a5520711e8b41d018cb7d1f530212065eb8d2d34 100644 (file)
@@ -51,6 +51,7 @@
 #define DEBUG_ASSERT_COMPONENT_NAME_STRING "kxld"
 #include <AssertMacros.h>
 
 #define DEBUG_ASSERT_COMPONENT_NAME_STRING "kxld"
 #include <AssertMacros.h>
 
+#include "kxld_demangle.h"
 #include "kxld_dict.h"
 #include "kxld_kext.h"
 #include "kxld_reloc.h"
 #include "kxld_dict.h"
 #include "kxld_kext.h"
 #include "kxld_reloc.h"
@@ -1096,6 +1097,10 @@ create_vtables(KXLDKext *kext)
     char class_name[KXLD_MAX_NAME_LEN];
     char vtable_name[KXLD_MAX_NAME_LEN];
     char meta_vtable_name[KXLD_MAX_NAME_LEN];
     char class_name[KXLD_MAX_NAME_LEN];
     char vtable_name[KXLD_MAX_NAME_LEN];
     char meta_vtable_name[KXLD_MAX_NAME_LEN];
+    char *demangled_name1 = NULL;
+    char *demangled_name2 = NULL;
+    size_t demangled_length1 = 0;
+    size_t demangled_length2 = 0;
     u_int i = 0;
     u_int nvtables = 0;
 
     u_int i = 0;
     u_int nvtables = 0;
 
@@ -1161,7 +1166,10 @@ create_vtables(KXLDKext *kext)
                 } else {
                     kxld_log(kKxldLogPatching, kKxldLogErr, 
                         "Warning: " kKxldLogMissingVtable, 
                 } else {
                     kxld_log(kKxldLogPatching, kKxldLogErr, 
                         "Warning: " kKxldLogMissingVtable, 
-                        meta_vtable_name, class_name);
+                        kxld_demangle(meta_vtable_name, &demangled_name1, 
+                            &demangled_length1), 
+                        kxld_demangle(class_name, &demangled_name2, 
+                            &demangled_length2));
                     kxld_array_resize(&kext->vtables, --nvtables);
                 }
             }
                     kxld_array_resize(&kext->vtables, --nvtables);
                 }
             }
@@ -1231,6 +1239,10 @@ create_vtables(KXLDKext *kext)
     rval = KERN_SUCCESS;
 
 finish:
     rval = KERN_SUCCESS;
 
 finish:
+
+    if (demangled_name1) kxld_free(demangled_name1, demangled_length1);
+    if (demangled_name2) kxld_free(demangled_name2, demangled_length2);
+
     return rval;
 }
 
     return rval;
 }
 
@@ -1950,6 +1962,8 @@ resolve_symbols(KXLDKext *kext, KXLDDict *defined_symbols,
     boolean_t tests_for_weak = FALSE;
     boolean_t error = FALSE;
     boolean_t warning = FALSE;
     boolean_t tests_for_weak = FALSE;
     boolean_t error = FALSE;
     boolean_t warning = FALSE;
+    char *demangled_name = NULL;
+    size_t demangled_length = 0;
 
     check(kext);
     check(defined_symbols);
 
     check(kext);
     check(defined_symbols);
@@ -1981,8 +1995,8 @@ resolve_symbols(KXLDKext *kext, KXLDDict *defined_symbols,
                     "The following symbols were defined more than once:");
             }
 
                     "The following symbols were defined more than once:");
             }
 
-            kxld_log(kKxldLogLinking, kKxldLogErr,
-                "\t%s: %p - %p", sym->name, 
+            kxld_log(kKxldLogLinking, kKxldLogErr, "\t%s: %p - %p", 
+                kxld_demangle(sym->name, &demangled_name, &demangled_length),
                 (void *) (uintptr_t) sym->link_addr, 
                 (void *) (uintptr_t) addr);
         }
                 (void *) (uintptr_t) sym->link_addr, 
                 (void *) (uintptr_t) addr);
         }
@@ -2011,7 +2025,8 @@ resolve_symbols(KXLDKext *kext, KXLDDict *defined_symbols,
                          "The following are common symbols:");
                 }
             }
                          "The following are common symbols:");
                 }
             }
-            kxld_log(kKxldLogLinking, kKxldLogErr, "\t%s", sym->name);
+            kxld_log(kKxldLogLinking, kKxldLogErr, "\t%s", 
+                kxld_demangle(sym->name, &demangled_name, &demangled_length));
 
         } else {
 
 
         } else {
 
@@ -2045,7 +2060,8 @@ resolve_symbols(KXLDKext *kext, KXLDDict *defined_symbols,
 
                 if (obsolete_symbols && kxld_dict_find(obsolete_symbols, name)) {
                     kxld_log(kKxldLogLinking, kKxldLogWarn, 
 
                 if (obsolete_symbols && kxld_dict_find(obsolete_symbols, name)) {
                     kxld_log(kKxldLogLinking, kKxldLogWarn, 
-                        "This kext uses obsolete symbol %s.", name);
+                        "This kext uses obsolete symbol %s.", 
+                        kxld_demangle(name, &demangled_name, &demangled_length));
                 }
 
             } else if (kext->link_type == KXLD_LINK_PSEUDO_KEXT) {
                 }
 
             } else if (kext->link_type == KXLD_LINK_PSEUDO_KEXT) {
@@ -2058,7 +2074,8 @@ resolve_symbols(KXLDKext *kext, KXLDDict *defined_symbols,
                         "This symbol set has the following unresolved symbols:");
                     warning = TRUE;
                 }
                         "This symbol set has the following unresolved symbols:");
                     warning = TRUE;
                 }
-                kxld_log(kKxldLogLinking, kKxldLogErr, "\t%s", sym->name);
+                kxld_log(kKxldLogLinking, kKxldLogErr, "\t%s", 
+                    kxld_demangle(sym->name, &demangled_name, &demangled_length));
                 kxld_sym_delete(sym);
 
             } else if (kxld_sym_is_weak(sym)) {
                 kxld_sym_delete(sym);
 
             } else if (kxld_sym_is_weak(sym)) {
@@ -2092,6 +2109,7 @@ resolve_symbols(KXLDKext *kext, KXLDDict *defined_symbols,
     rval = KERN_SUCCESS;
 
 finish:
     rval = KERN_SUCCESS;
 
 finish:
+    if (demangled_name) kxld_free(demangled_name, demangled_length);
 
     return rval;
 }
 
     return rval;
 }
@@ -2148,6 +2166,10 @@ patch_vtables(KXLDKext *kext, KXLDDict *patched_vtables,
     char vtable_name[KXLD_MAX_NAME_LEN];
     char super_vtable_name[KXLD_MAX_NAME_LEN];
     char final_sym_name[KXLD_MAX_NAME_LEN];
     char vtable_name[KXLD_MAX_NAME_LEN];
     char super_vtable_name[KXLD_MAX_NAME_LEN];
     char final_sym_name[KXLD_MAX_NAME_LEN];
+    char *demangled_name1 = NULL;
+    char *demangled_name2 = NULL;
+    size_t demangled_length1 = 0;;
+    size_t demangled_length2 = 0;
     size_t len = 0;
     u_int nvtables = 0;
     u_int npatched = 0;
     size_t len = 0;
     u_int nvtables = 0;
     u_int npatched = 0;
@@ -2204,7 +2226,11 @@ patch_vtables(KXLDKext *kext, KXLDDict *patched_vtables,
 
                 if (failure) {
                     kxld_log(kKxldLogPatching, kKxldLogErr, 
 
                 if (failure) {
                     kxld_log(kKxldLogPatching, kKxldLogErr, 
-                        "\t%s (super vtable %s)", vtable_name, super_vtable_name);
+                        "\t'%s' (super vtable '%s')", 
+                        kxld_demangle(vtable_name, &demangled_name1, 
+                            &demangled_length1), 
+                        kxld_demangle(super_vtable_name, &demangled_name2, 
+                            &demangled_length2));
                     continue;
                 }
 
                     continue;
                 }
 
@@ -2228,8 +2254,11 @@ patch_vtables(KXLDKext *kext, KXLDDict *patched_vtables,
                 require_action(!final_sym, finish, 
                     rval=KERN_FAILURE;
                     kxld_log(kKxldLogPatching, kKxldLogErr, 
                 require_action(!final_sym, finish, 
                     rval=KERN_FAILURE;
                     kxld_log(kKxldLogPatching, kKxldLogErr, 
-                        "Class %s is a subclass of final class %s.",
-                        class_name, super_class_name));
+                        "Class '%s' is a subclass of final class '%s'.",
+                        kxld_demangle(class_name, &demangled_name1, 
+                            &demangled_length1), 
+                        kxld_demangle(super_class_name, &demangled_name2, 
+                            &demangled_length2)));
 
                 /* Patch the class's vtable */
                 rval = kxld_vtable_patch(vtable, super_vtable, kext->symtab,
 
                 /* Patch the class's vtable */
                 rval = kxld_vtable_patch(vtable, super_vtable, kext->symtab,
@@ -2297,6 +2326,9 @@ patch_vtables(KXLDKext *kext, KXLDDict *patched_vtables,
 
     rval = KERN_SUCCESS;
 finish:
 
     rval = KERN_SUCCESS;
 finish:
+    if (demangled_name1) kxld_free(demangled_name1, demangled_length1);
+    if (demangled_name2) kxld_free(demangled_name2, demangled_length2);
+
     return rval;
 }
 
     return rval;
 }
 
@@ -2309,6 +2341,8 @@ validate_symbols(KXLDKext *kext)
     KXLDSymtabIterator iter;
     KXLDSym *sym = NULL;
     u_int error = FALSE;
     KXLDSymtabIterator iter;
     KXLDSym *sym = NULL;
     u_int error = FALSE;
+    char *demangled_name = NULL;
+    size_t demangled_length = 0;
     
     /* Check for any unresolved symbols */
     kxld_symtab_iterator_init(&iter, kext->symtab, kxld_sym_is_unresolved, FALSE);
     
     /* Check for any unresolved symbols */
     kxld_symtab_iterator_init(&iter, kext->symtab, kxld_sym_is_unresolved, FALSE);
@@ -2318,13 +2352,15 @@ validate_symbols(KXLDKext *kext)
             kxld_log(kKxldLogLinking, kKxldLogErr, 
                 "The following symbols are unresolved for this kext:");
         }
             kxld_log(kKxldLogLinking, kKxldLogErr, 
                 "The following symbols are unresolved for this kext:");
         }
-        kxld_log(kKxldLogLinking, kKxldLogErr, "\t%s", sym->name);
+        kxld_log(kKxldLogLinking, kKxldLogErr, "\t%s", 
+            kxld_demangle(sym->name, &demangled_name, &demangled_length));
     }
     require_noerr_action(error, finish, rval=KERN_FAILURE);
 
     rval = KERN_SUCCESS;
 
 finish:
     }
     require_noerr_action(error, finish, rval=KERN_FAILURE);
 
     rval = KERN_SUCCESS;
 
 finish:
+    if (demangled_name) kxld_free(demangled_name, demangled_length);
     return rval;
 }
 
     return rval;
 }
 
index 9c387e670b0579b8e5a28a1d10f7b1f0a419dcbc..3392b4a74a2ef205ca8ff0aa533f293fdc0c9724 100644 (file)
@@ -115,17 +115,17 @@ void kxld_log(KXLDLogSubsystem subsystem, KXLDLogLevel level,
 #define kKxldLogArchNotSupported        "The target architecture (cputype 0x%x) is not supported by kxld."
 #define kKxldLogArchNotFound            "The kext does not contain a fat slice for the target architecture."
 #define kKxldLogFiletypeNotSupported    "The Mach-O filetype 0x%x is not supported on the target architecture."
 #define kKxldLogArchNotSupported        "The target architecture (cputype 0x%x) is not supported by kxld."
 #define kKxldLogArchNotFound            "The kext does not contain a fat slice for the target architecture."
 #define kKxldLogFiletypeNotSupported    "The Mach-O filetype 0x%x is not supported on the target architecture."
-#define kKxldLogTruncatedMachO          "The Mach-O file has been truncated.  Make sure the Mach-O header structures are correct."
+#define kKxldLogTruncatedMachO          "The Mach-O file has been truncated. Make sure the Mach-O header structures are correct."
 #define kKxldLogMalformedMachO          "The Mach-O file is malformed: "
 #define kKxldLogMalformedMachO          "The Mach-O file is malformed: "
-#define kKxldLogMalformedVTable         "The vtable %s is malformed.  Make sure your kext has been built against the correct headers."
-#define kKxldLogMissingVtable           "Cannot find the vtable %s for class %s.  This vtable symbol is required for binary compatibility, and it may have been stripped."
-#define kKxldLogParentOutOfDate         "The super class vtable %s for vtable %s is out of date.  Make sure your kext has been built against the correct headers."
+#define kKxldLogMalformedVTable         "The vtable '%s' is malformed. Make sure your kext has been built against the correct headers."
+#define kKxldLogMissingVtable           "Cannot find the vtable '%s' for class '%s'. This vtable symbol is required for binary compatibility, and it may have been stripped."
+#define kKxldLogParentOutOfDate         "The super class vtable '%s' for vtable '%s' is out of date. Make sure your kext has been built against the correct headers."
 #define kKxldLogNoKmodInfo              "The kext is missing its kmod_info structure."
 #define kKxldLogInvalidSectReloc        "Relocation entry %u from section %s,%s cannot be processed."
 #define kKxldLogInvalidExtReloc         "External relocation entry %u cannot be processed."
 #define kKxldLogInvalidIntReloc         "Internal relocation entry %u cannot be processed."
 #define kKxldLogNoKmodInfo              "The kext is missing its kmod_info structure."
 #define kKxldLogInvalidSectReloc        "Relocation entry %u from section %s,%s cannot be processed."
 #define kKxldLogInvalidExtReloc         "External relocation entry %u cannot be processed."
 #define kKxldLogInvalidIntReloc         "Internal relocation entry %u cannot be processed."
-#define kKxldLogRelocationOverflow      "A relocation entry has overflowed.  The kext may be too far from one " \
-                                        "of its dependencies.  Check your kext's load address."
+#define kKxldLogRelocationOverflow      "A relocation entry has overflowed. The kext may be too far from one " \
+                                        "of its dependencies. Check your kext's load address."
 
 /*******************************************************************************
 * Allocators 
 
 /*******************************************************************************
 * Allocators 
index 78e647e6bbff0651c04afd554495b39889b81261..208c030d98921b321fa4ed8a9660538cdc06dc9a 100644 (file)
@@ -32,6 +32,7 @@
 #define DEBUG_ASSERT_COMPONENT_NAME_STRING "kxld"
 #include <AssertMacros.h>
 
 #define DEBUG_ASSERT_COMPONENT_NAME_STRING "kxld"
 #include <AssertMacros.h>
 
+#include "kxld_demangle.h"
 #include "kxld_reloc.h"
 #include "kxld_sect.h"
 #include "kxld_state.h"
 #include "kxld_reloc.h"
 #include "kxld_sect.h"
 #include "kxld_state.h"
@@ -73,6 +74,8 @@ kxld_vtable_init_from_kernel_macho(KXLDVTable *vtable, const KXLDSym *sym,
     const KXLDRelocator *relocator)
 {
     kern_return_t rval = KERN_FAILURE;
     const KXLDRelocator *relocator)
 {
     kern_return_t rval = KERN_FAILURE;
+    char *demangled_name = NULL;
+    size_t demangled_length = 0;
 
     check(vtable);
     check(sym);
 
     check(vtable);
     check(sym);
@@ -86,7 +89,8 @@ kxld_vtable_init_from_kernel_macho(KXLDVTable *vtable, const KXLDSym *sym,
     require_action(kxld_sect_get_num_relocs(sect) == 0, finish,
         rval=KERN_FAILURE;
         kxld_log(kKxldLogPatching, kKxldLogErr, 
     require_action(kxld_sect_get_num_relocs(sect) == 0, finish,
         rval=KERN_FAILURE;
         kxld_log(kKxldLogPatching, kKxldLogErr, 
-            kKxldLogMalformedVTable, vtable->name));
+            kKxldLogMalformedVTable,
+            kxld_demangle(vtable->name, &demangled_name, &demangled_length)));
 
     rval = init_by_entries(vtable, symtab, relocator);
     require_noerr(rval, finish);
 
     rval = init_by_entries(vtable, symtab, relocator);
     require_noerr(rval, finish);
@@ -96,8 +100,8 @@ kxld_vtable_init_from_kernel_macho(KXLDVTable *vtable, const KXLDSym *sym,
     rval = KERN_SUCCESS;
 
 finish:
     rval = KERN_SUCCESS;
 
 finish:
-
     if (rval) kxld_vtable_deinit(vtable);
     if (rval) kxld_vtable_deinit(vtable);
+    if (demangled_name) kxld_free(demangled_name, demangled_length);
 
     return rval;
 }
 
     return rval;
 }
@@ -110,6 +114,8 @@ kxld_vtable_init_from_object_macho(KXLDVTable *vtable, const KXLDSym *sym,
     const KXLDRelocator *relocator)
 {
     kern_return_t rval = KERN_FAILURE;
     const KXLDRelocator *relocator)
 {
     kern_return_t rval = KERN_FAILURE;
+    char *demangled_name = NULL;
+    size_t demangled_length = 0;
 
     check(vtable);
     check(sym);
 
     check(vtable);
     check(sym);
@@ -123,7 +129,8 @@ kxld_vtable_init_from_object_macho(KXLDVTable *vtable, const KXLDSym *sym,
     require_action(kxld_sect_get_num_relocs(sect) > 0, finish,
         rval=KERN_FAILURE;
         kxld_log(kKxldLogPatching, kKxldLogErr, 
     require_action(kxld_sect_get_num_relocs(sect) > 0, finish,
         rval=KERN_FAILURE;
         kxld_log(kKxldLogPatching, kKxldLogErr, 
-            kKxldLogMalformedVTable, vtable->name));
+            kKxldLogMalformedVTable, 
+            kxld_demangle(vtable->name, &demangled_name, &demangled_length)));
 
     rval = init_by_relocs(vtable, sym, sect, symtab, relocator);
     require_noerr(rval, finish);
 
     rval = init_by_relocs(vtable, sym, sect, symtab, relocator);
     require_noerr(rval, finish);
@@ -131,8 +138,8 @@ kxld_vtable_init_from_object_macho(KXLDVTable *vtable, const KXLDSym *sym,
     rval = KERN_SUCCESS;
 
 finish:
     rval = KERN_SUCCESS;
 
 finish:
-
     if (rval) kxld_vtable_deinit(vtable);
     if (rval) kxld_vtable_deinit(vtable);
+    if (demangled_name) kxld_free(demangled_name, demangled_length);
 
     return rval;
 }
 
     return rval;
 }
@@ -145,6 +152,8 @@ kxld_vtable_init_from_final_macho(KXLDVTable *vtable, const KXLDSym *sym,
     const KXLDRelocator *relocator, const KXLDArray *relocs)
 {
     kern_return_t rval = KERN_FAILURE;
     const KXLDRelocator *relocator, const KXLDArray *relocs)
 {
     kern_return_t rval = KERN_FAILURE;
+    char *demangled_name = NULL;
+    size_t demangled_length = 0;
 
     check(vtable);
     check(sym);
 
     check(vtable);
     check(sym);
@@ -158,7 +167,8 @@ kxld_vtable_init_from_final_macho(KXLDVTable *vtable, const KXLDSym *sym,
     require_action(kxld_sect_get_num_relocs(sect) == 0, finish,
         rval=KERN_FAILURE;
         kxld_log(kKxldLogPatching, kKxldLogErr, 
     require_action(kxld_sect_get_num_relocs(sect) == 0, finish,
         rval=KERN_FAILURE;
         kxld_log(kKxldLogPatching, kKxldLogErr, 
-            kKxldLogMalformedVTable, vtable->name));
+            kKxldLogMalformedVTable, 
+            kxld_demangle(vtable->name, &demangled_name, &demangled_length)));
 
     rval = init_by_entries_and_relocs(vtable, sym, symtab,
         relocator, relocs);
 
     rval = init_by_entries_and_relocs(vtable, sym, symtab,
         relocator, relocs);
@@ -168,6 +178,7 @@ kxld_vtable_init_from_final_macho(KXLDVTable *vtable, const KXLDSym *sym,
 
 finish:
     if (rval) kxld_vtable_deinit(vtable);
 
 finish:
     if (rval) kxld_vtable_deinit(vtable);
+    if (demangled_name) kxld_free(demangled_name, demangled_length);
 
     return rval;
 }
 
     return rval;
 }
@@ -499,6 +510,8 @@ init_by_entries_and_relocs(KXLDVTable *vtable, const KXLDSym *sym,
     kxld_addr_t entry_offset = 0;
     u_int nentries = 0;
     u_int i = 0;
     kxld_addr_t entry_offset = 0;
     u_int nentries = 0;
     u_int i = 0;
+    char *demangled_name1 = NULL;
+    size_t demangled_length1 = 0;
 
     check(vtable);
     check(sym);
 
     check(vtable);
     check(sym);
@@ -573,7 +586,9 @@ init_by_entries_and_relocs(KXLDVTable *vtable, const KXLDSym *sym,
             require_action(reloc, finish,
                 rval=KERN_FAILURE;
                 kxld_log(kKxldLogPatching, kKxldLogErr, 
             require_action(reloc, finish,
                 rval=KERN_FAILURE;
                 kxld_log(kKxldLogPatching, kKxldLogErr, 
-                    kKxldLogMalformedVTable, vtable->name));
+                    kKxldLogMalformedVTable, 
+                    kxld_demangle(vtable->name, &demangled_name1, 
+                        &demangled_length1)));
         
             tmpsym = kxld_reloc_get_symbol(relocator, reloc, 
                 /* data */ NULL, symtab);
         
             tmpsym = kxld_reloc_get_symbol(relocator, reloc, 
                 /* data */ NULL, symtab);
@@ -630,6 +645,12 @@ kxld_vtable_patch(KXLDVTable *vtable, const KXLDVTable *super_vtable,
     KXLDSym *sym = NULL;
     u_int symindex = 0;
     u_int i = 0;
     KXLDSym *sym = NULL;
     u_int symindex = 0;
     u_int i = 0;
+    char *demangled_name1 = NULL;
+    char *demangled_name2 = NULL;
+    char *demangled_name3 = NULL;
+    size_t demangled_length1 = 0;
+    size_t demangled_length2 = 0;
+    size_t demangled_length3 = 0;
 
     check(vtable);
     check(super_vtable);
 
     check(vtable);
     check(super_vtable);
@@ -637,8 +658,8 @@ kxld_vtable_patch(KXLDVTable *vtable, const KXLDVTable *super_vtable,
     require_action(!vtable->is_patched, finish, rval=KERN_SUCCESS);
     require_action(vtable->entries.nitems >= super_vtable->entries.nitems, finish,
         rval=KERN_FAILURE;
     require_action(!vtable->is_patched, finish, rval=KERN_SUCCESS);
     require_action(vtable->entries.nitems >= super_vtable->entries.nitems, finish,
         rval=KERN_FAILURE;
-        kxld_log(kKxldLogPatching, kKxldLogErr, 
-            kKxldLogMalformedVTable, vtable->name));
+        kxld_log(kKxldLogPatching, kKxldLogErr, kKxldLogMalformedVTable, 
+            kxld_demangle(vtable->name, &demangled_name1, &demangled_length1)));
 
     for (i = 0; i < super_vtable->entries.nitems; ++i) {
         child_entry = kxld_array_get_item(&vtable->entries, i);
 
     for (i = 0; i < super_vtable->entries.nitems; ++i) {
         child_entry = kxld_array_get_item(&vtable->entries, i);
@@ -688,7 +709,11 @@ kxld_vtable_patch(KXLDVTable *vtable, const KXLDVTable *super_vtable,
         require_action(!kxld_sym_name_is_padslot(parent_entry->patched.name),
             finish, rval=KERN_FAILURE;
             kxld_log(kKxldLogPatching, kKxldLogErr, 
         require_action(!kxld_sym_name_is_padslot(parent_entry->patched.name),
             finish, rval=KERN_FAILURE;
             kxld_log(kKxldLogPatching, kKxldLogErr, 
-                kKxldLogParentOutOfDate, super_vtable->name, vtable->name));
+                kKxldLogParentOutOfDate, 
+                kxld_demangle(super_vtable->name, &demangled_name1, 
+                    &demangled_length1), 
+                kxld_demangle(vtable->name, &demangled_name2, 
+                    &demangled_length2)));
 
 #if KXLD_USER_OR_STRICT_PATCHING
         /* 5) If we are doing strict patching, we prevent kexts from declaring
 
 #if KXLD_USER_OR_STRICT_PATCHING
         /* 5) If we are doing strict patching, we prevent kexts from declaring
@@ -748,8 +773,11 @@ kxld_vtable_patch(KXLDVTable *vtable, const KXLDVTable *super_vtable,
         require_noerr(rval, finish);
 
         kxld_log(kKxldLogPatching, kKxldLogDetail,
         require_noerr(rval, finish);
 
         kxld_log(kKxldLogPatching, kKxldLogDetail,
-            "In vtable %s, patching %s with %s.", 
-            vtable->name, child_entry->unpatched.sym->name, sym->name);
+            "In vtable '%s', patching '%s' with '%s'.", 
+            kxld_demangle(vtable->name, &demangled_name1, &demangled_length1),
+            kxld_demangle(child_entry->unpatched.sym->name, 
+                &demangled_name2, &demangled_length2), 
+            kxld_demangle(sym->name, &demangled_name3, &demangled_length3));
 
         kxld_sym_patch(child_entry->unpatched.sym);
         child_entry->unpatched.sym = sym;
 
         kxld_sym_patch(child_entry->unpatched.sym);
         child_entry->unpatched.sym = sym;
@@ -779,6 +807,10 @@ kxld_vtable_patch(KXLDVTable *vtable, const KXLDVTable *super_vtable,
     rval = KERN_SUCCESS;
 
 finish:
     rval = KERN_SUCCESS;
 
 finish:
+    if (demangled_name1) kxld_free(demangled_name1, demangled_length1);
+    if (demangled_name2) kxld_free(demangled_name2, demangled_length2);
+    if (demangled_name3) kxld_free(demangled_name3, demangled_length3);
+    
     return rval;
 }
 
     return rval;
 }
 
index d8e157483531fdf2e43960cce9a5f12e48d21640..36d9127e29c393ddab5bbdb5ccc14f7ca219f02e 100644 (file)
@@ -49,7 +49,7 @@ extern "C" {
  * reading and updating of values.
  */
  
  * reading and updating of values.
  */
  
-#if defined(__i386__) || defined(__x86_64__)
+#if defined(__i386__) || defined(__x86_64__) || defined(__arm__)
 
 /*!
  * @function OSCompareAndSwap64
 
 /*!
  * @function OSCompareAndSwap64
index 14337f3ab025386dd55e41bd2ac05d9a201e45f7..312d53993e761251522be754599d714fd252bba4 100644 (file)
@@ -399,7 +399,7 @@ private:
 
     static  void  sendAllKextPersonalitiesToCatalog(
         bool startMatching = false);
 
     static  void  sendAllKextPersonalitiesToCatalog(
         bool startMatching = false);
-    virtual void  sendPersonalitiesToCatalog(
+    virtual OSReturn  sendPersonalitiesToCatalog(
         bool      startMatching    = false,
         OSArray * personalityNames = NULL);
     
         bool      startMatching    = false,
         OSArray * personalityNames = NULL);
     
index e1fc062e1853cf35a00b2b837b5c4958ece649d8..86238fc35a48b39f2f39c26af44379668f16434c 100644 (file)
  * 
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
  * 
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
+#include <stdint.h> // For uintptr_t.
 #include <string.h>
 #include <libkern/mkext.h>
 
 #include <string.h>
 #include <libkern/mkext.h>
 
+
 #define BASE 65521L /* largest prime smaller than 65536 */
 #define BASE 65521L /* largest prime smaller than 65536 */
-#define NMAX 5000  
-// NMAX (was 5521) the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1
+#define NMAX 5552  // the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1
 
 #define DO1(buf,i)  {s1 += buf[i]; s2 += s1;}
 #define DO2(buf,i)  DO1(buf,i); DO1(buf,i+1);
 
 #define DO1(buf,i)  {s1 += buf[i]; s2 += s1;}
 #define DO2(buf,i)  DO1(buf,i); DO1(buf,i+1);
@@ -45,6 +46,23 @@ mkext_adler32(uint8_t *buf, int32_t len)
     unsigned long s2 = 0; // (adler >> 16) & 0xffff;
     int k;
 
     unsigned long s2 = 0; // (adler >> 16) & 0xffff;
     int k;
 
+#if defined _ARM_ARCH_6
+
+       /* align buf to 16-byte boundary */
+    while ((((uintptr_t)buf)&15)&&(len>0)) { /* not on a 16-byte boundary */
+        len--;
+        s1 += *buf++;
+        s2 += s1;
+        if (s1 >= BASE) s1 -= BASE;
+    }
+       s2 %= BASE;
+
+       if (len>=16) {
+               return adler32_vec(s1, s2, buf, len);
+       }
+
+#endif
+
     while (len > 0) {
         k = len < NMAX ? len : NMAX;
         len -= k;
     while (len > 0) {
         k = len < NMAX ? len : NMAX;
         len -= k;
index c94fde18702ee0957c3b2fbe020d00c01b29f177..bf0d9723ac2e275dee4a669d6b521b33184c6049 100644 (file)
@@ -32,6 +32,9 @@
 
 /* @(#) $Id$ */
 
 
 /* @(#) $Id$ */
 
+#include <stdint.h> // For uintptr_t.
+
+
 #define ZLIB_INTERNAL
 #if KERNEL
     #include <libkern/zlib.h>
 #define ZLIB_INTERNAL
 #if KERNEL
     #include <libkern/zlib.h>
     #include "zlib.h"
 #endif /* KERNEL */
 
     #include "zlib.h"
 #endif /* KERNEL */
 
+#if defined _ARM_ARCH_6
+       extern uLong adler32_vec(uLong adler, uLong sum2, const Bytef *buf, uInt len);
+#endif
+
 #define BASE 65521UL    /* largest prime smaller than 65536 */
 #define NMAX 5552
 /* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */
 #define BASE 65521UL    /* largest prime smaller than 65536 */
 #define NMAX 5552
 /* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */
@@ -91,7 +98,9 @@ uLong ZEXPORT adler32(adler, buf, len)
     uInt len;
 {
     unsigned long sum2;
     uInt len;
 {
     unsigned long sum2;
+#if !defined _ARM_ARCH_6
     unsigned n;
     unsigned n;
+#endif
 
     /* split Adler-32 into component sums */
     sum2 = (adler >> 16) & 0xffff;
 
     /* split Adler-32 into component sums */
     sum2 = (adler >> 16) & 0xffff;
@@ -124,6 +133,20 @@ uLong ZEXPORT adler32(adler, buf, len)
         return adler | (sum2 << 16);
     }
 
         return adler | (sum2 << 16);
     }
 
+#if defined _ARM_ARCH_6
+    /* align buf to 16-byte boundary */
+    while (((uintptr_t)buf)&15) { /* not on a 16-byte boundary */
+        len--;
+        adler += *buf++;
+        sum2 += adler;
+        if (adler >= BASE) adler -= BASE;
+        MOD4(sum2);             /* only added so many BASE's */
+    }
+
+    return adler32_vec(adler, sum2, buf, len);      // armv7 neon vectorized implementation
+
+#else   //  _ARM_ARCH_6
+
     /* do length NMAX blocks -- requires just one modulo operation */
     while (len >= NMAX) {
         len -= NMAX;
     /* do length NMAX blocks -- requires just one modulo operation */
     while (len >= NMAX) {
         len -= NMAX;
@@ -153,6 +176,8 @@ uLong ZEXPORT adler32(adler, buf, len)
 
     /* return recombined sums */
     return adler | (sum2 << 16);
 
     /* return recombined sums */
     return adler | (sum2 << 16);
+
+#endif  // _ARM_ARCH_6
 }
 
 /* ========================================================================= */
 }
 
 /* ========================================================================= */
diff --git a/libkern/zlib/arm/adler32vec.s b/libkern/zlib/arm/adler32vec.s
new file mode 100644 (file)
index 0000000..3af072c
--- /dev/null
@@ -0,0 +1,428 @@
+#include <arm/arch.h>
+
+#define BASE 65521         /* largest prime smaller than 65536 */
+#define NMAX 5552              /* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */
+
+// Note: buf should have been 16-byte aligned in the caller function,
+
+// uLong adler32_vec(unsigned int adler, unsigned int sum2, const Bytef* buf, int len) {
+//    unsigned n;
+//    while (len >= NMAX) {
+//        len -= NMAX;
+//        n = NMAX / 16;          /* NMAX is divisible by 16 */
+//        do {
+//            DO16(buf);          /* 16 sums unrolled */
+//            buf += 16;
+//        } while (--n);
+//        MOD(adler);
+//        MOD(sum2);
+//    }
+//    if (len) {                  /* avoid modulos if none remaining */
+//        while (len >= 16) {
+//            len -= 16;
+//            DO16(buf);
+//            buf += 16;
+//        }
+//        while (len--) {
+//            adler += *buf++;
+//            sum2 += adler;
+//        }
+//        MOD(adler);
+//        MOD(sum2);
+//    }
+//    return adler | (sum2 << 16);             /* return recombined sums */
+// }
+
+
+/* 
+       DO16 vectorization:
+       given initial unsigned int sum2 and adler, and a new set of 16 input bytes (x[0:15]), it can be shown that
+       sum2  += (16*adler + 16*x[0] + 15*x[1] + ... + 1*x[15]);
+       adler += (x[0] + x[1] + ... + x[15]);
+
+       therefore, this is what can be done to vectorize the above computation
+       1. 16-byte aligned vector load into q2 (x[0:x15])
+       2. sum2 += (adler<<4);
+       3. vmull.u8 (q9,q8),q2,d2 where d2 = (1,1,1,1...,1), (q9,q8) + 16 16-bit elements x[0:15]
+       4. vmull.u8 (q11,q10),q2,q0 where q0 = (1,2,3,4...,16), (q11,q10) + 16 16-bit elements (16:1)*x[0:15]
+       5. parallel add (with once expansion to 32-bit) (q9,q8) and (q11,q10) all the way to accumulate to adler and sum2 
+
+       In this revision, whenever possible, 2 DO16 loops are combined into a DO32 loop.
+       1. 32-byte aligned vector load into q2,q14 (x[0:x31])
+    2. sum2 += (adler<<5);
+    3. vmull.u8 (4 q registers),(q2,q14),d2 where d2 = (1,1,1,1...,1), (4 q registers) : 32 16-bit elements x[0:31]
+       4. vmull.u8 (4 q registers),(q2,q14),(q0,q15) where q0 = (1,...,32), (4 q regs) : 32 16-bit elements (32:1)*x[0:31]
+    5. parallel add (with once expansion to 32-bit) the pair of (4 q regs) all the way to accumulate to adler and sum2 
+
+       This change improves the performance by ~ 0.55 cycle/uncompress byte on ARM Cortex-A8.
+
+*/
+
+/*
+       MOD implementation:
+       adler%BASE = adler - floor(adler*(1/BASE))*BASE; where (1/BASE) = 0x80078071 in Q47
+       1. vmull.u32   q2,(adler,sum2),(1/BASE)         // *(1/BASE) in Q47
+    2. vshr.u64    q2,q2,#47                                   // floor function
+    3. vpadd.u32   d4,d4,d5                                            // merge into a double word in d4
+    4. vmls.u32    (adler,sum2),d4,d3[0]        // (adler,sum2) -= floor[(adler,sum2)/BASE]*BASE
+        
+*/
+
+#if defined _ARM_ARCH_6                        // this file would be used only for armv6 or above
+
+
+       .text
+       .align 2
+       .globl _adler32_vec
+_adler32_vec:
+#if (!KERNEL_SUPPORT_NEON) || (!defined _ARM_ARCH_7)   // for armv6 or armv7 without neon support
+
+
+       #define adler                   r0
+       #define sum2                    r1
+       #define buf                             r2
+       #define len                             r3      
+       #define one_by_base             r4
+       #define base                    r5
+       #define nmax                    r6
+       #define t                               r12
+       #define vecs                    lr
+       #define x0                              r8
+       #define x1                              r10
+       #define x2                              r11
+       #define x3                              r12
+       #define zero                    r9
+
+       // this macro performs adler/sum2 update for 4 input bytes
+
+       .macro DO4
+       add             sum2, adler, lsl #2                             // sum2 += 4*adler;
+       ldr             x0,[buf]                                                // 4 bytes in 1 32-bit word
+       usada8  adler, x0, zero, adler                  // adler += sum(x0:x3)
+       ldrb    x0,[buf], #4                                    // x0
+       ldrb    x2,[buf,#-2]                                    // x2
+       ldrb    x1,[buf,#-3]                                    // x1
+       ldrb    x3,[buf,#-1]                                    // x3
+       add             sum2, x0, lsl #2                                // sum2 += 4*x0
+       add             x3, x3, x1, lsl #1                              // x3+2*x1
+       add             sum2, x2, lsl #1                                // sum2 += 2*x2
+       add             x3, x1                                                  // x3+3*x1
+       add             sum2, x3                                                // sum2 += x3+3*x1
+       .endm
+
+       // the following macro cascades 4 DO4 into a adler/sum2 update for 16 bytes
+       .macro DO16
+       DO4                                                                             // adler/sum2 update for 4 input bytes
+       DO4                                                                             // adler/sum2 update for 4 input bytes
+       DO4                                                                             // adler/sum2 update for 4 input bytes
+       DO4                                                                             // adler/sum2 update for 4 input bytes
+       .endm
+
+       // the following macro performs adler sum2 modulo BASE
+       .macro  modulo_base
+       umull   x0,x1,adler,one_by_base                 // adler/BASE in Q47
+       umull   x2,x3,sum2,one_by_base                  // sum2/BASE in Q47
+       lsr             x1, #15                                                 // x1 >> 15 = floor(adler/BASE)
+       lsr             x3, #15                                                 // x3 >> 15 = floor(sum2/BASE)
+       mla             adler, x1, base, adler                  // adler %= base;
+       mla             sum2, x3, base, sum2                    // sum2 %= base;
+       .endm
+
+       adr             t, coeffs       
+       push    {r4-r6, r8-r11, lr}
+       ldmia   t, {one_by_base, base, nmax}    // load up coefficients
+
+       subs        len, nmax                   // pre-subtract len by NMAX
+       eor                     zero, zero                                      // a dummy zero register to use usada8 instruction
+    blt         len_lessthan_NMAX           // if (len < NMAX) skip the while loop     
+
+while_lengenmax_loop:                                          // do {
+    lsr         vecs, nmax, #4              // vecs = NMAX/16;
+
+len16_loop:                                                                    // do {
+
+       DO16
+
+       subs    vecs, #1                                                // vecs--;
+       bgt                     len16_loop                                      // } while (vec>0);     
+
+       modulo_base                                                             // adler sum2 modulo BASE
+
+       subs            len, nmax                                       // len -= NMAX
+       bge                     while_lengenmax_loop            // } while (len >= NMAX);
+
+len_lessthan_NMAX:
+       adds            len, nmax                                       // post-subtract len by NMAX
+
+       subs            len, #16                                        // pre-decrement len by 16
+       blt                     len_lessthan_16
+
+len16_loop2:
+
+       DO16
+
+       subs            len, #16
+       bge                     len16_loop2
+
+len_lessthan_16:
+       adds            len, #16                                        // post-increment len by 16
+       beq                     len_is_zero
+
+remaining_buf:
+       ldrb            x0, [buf], #1
+       subs            len, #1
+       add                     adler, x0
+       add                     sum2, adler
+       bgt                     remaining_buf
+
+len_is_zero:
+
+       modulo_base                                                     // adler sum2 modulo BASE
+
+       add             r0, adler, sum2, lsl #16                // to return sum2<<16 | adler 
+
+       pop             {r4-r6, r8-r11, pc}
+
+       .align 2
+coeffs:
+       .long   -2146992015
+       .long   -BASE
+       .long   NMAX
+
+#else  // KERNEL_SUPPORT_NEON
+
+
+
+       #define adler   r0
+       #define sum2    r1
+       #define buf             r2
+       #define len             r3      
+       #define nmax    r4
+       #define vecs    lr                              // vecs = NMAX/16
+       #define n               r5
+
+       #define t               r12
+
+       #define sum2_coeff              q0
+       #define sum2_coeff0             d0
+       #define sum2_coeff1             d1
+       #define alder_coeff             q1
+       #define ones                    d2
+       #define x0_x15                  q2
+       #define x0_x7                   d4
+       #define x8_x15                  d5
+       #define adlersum2               d6
+       #define adler16                 d25
+
+#if defined _ARM_ARCH_7 
+
+       adr                     t, vec_table                            // address to vec_table[]
+       stmfd           sp!, {r4, r5, lr}
+
+       vld1.32         {q0-q1},[t,:128]!                       // loading up coefficients for adler/sum2 computation
+       vld1.32         {q15},[t,:128]!                         // for sum2 computation
+       ldr                     nmax, [t]                                       // NMAX
+
+       vmov            adlersum2, sum2, adler          // pack up adler/sum2 into a double register 
+
+       cmp                     len, nmax                                       // len vs NMAX
+       lsr                     vecs, nmax, #4                          // vecs = NMAX/16;
+       blt                     len_lessthan_NMAX                       // if (len < NMAX) skip the while loop          
+
+       sub                     len, nmax                                       // pre-decrement len by NMAX
+
+while_len_ge_NMAX_loop:                                        // while (len>=NMAX) {
+
+       mov                     n, vecs, lsr #1                 // n = NMAX/16; 
+
+do_loop:                                                                       // do {
+
+       vshll.u32       q12, adlersum2, #5                      // d25 = (0,32*adler) to be added into (adler,sum2)
+       vld1.32         {x0_x15},[buf,:128]!            // 16-byte input x0:x15
+       vmull.u8        q8, x0_x7, ones                         // 16-bit x0-x7
+       vld1.32         {q14}, [buf,:128]!                      // x16:x31
+       vmull.u8        q9, x8_x15, ones                        // 16-bit x8-x15
+       vadd.u32        adlersum2,adler16                       // sum2 += old adler*32;
+       vmull.u8        q12, d28, ones                          // 16-bit x16-x23
+       vmull.u8        q13, d29, ones                          // 16-bit x24-x31
+       vmull.u8        q10, d28, sum2_coeff0           // 16-bit x16*16, x17*15, ..., x23*9
+       vmull.u8        q11, d29, sum2_coeff1           // 16-bit x24*8, x25*7, ..., x31*1      
+       vadd.u16        q8, q8, q9                                      // q8 = (x0+x8):(x7+x15) 8 16-bit elements for adler
+       vmull.u8        q9, x0_x7, d30                          // 16-bit x0*32,...,x7*25
+       vmull.u8        q14, x8_x15, d31                        // 16-bit x8*24,...,x15*17
+       vadd.u16        q12, q12, q13                           // q12 = (x16+x24):(x23+x31) 8 16-bit elements for adler
+       vadd.u16        q10, q11                                        // 8 16-bit elements for sum2
+       vadd.u16        q8, q12                                         // 8 16-bit elements for adler
+       vadd.u16        q9, q14                                         // 8 16-bit elements for sum2 
+       vadd.u16        q10, q9                                         // 8 16-bit elements for sum2
+       vpaddl.u16      q8, q8                                          // 4 32-bit elements for adler
+       vpaddl.u16      q10, q10                                        // 4 32-bit elements for sum2
+       vpadd.u32       d16,d16,d17                                     // 2 32-bit elements for adler
+       vpadd.u32       d17,d20,d21                                     // 2 32-bit elements for sum2
+       subs            n, #1                                           //  --n 
+       vpadd.u32       d4,d17,d16                                      // s8 : 32-bit elements for sum2, s9 : 32-bit element for adler
+       vadd.u32        adlersum2,d4                            // update adler/sum2 with the new 16 bytes input
+
+       bgt                     do_loop                                         // } while (--n);
+
+       vshll.u32       q12, adlersum2, #4                      // d25 = (0,16*adler) to be added into (adler,sum2)
+
+       vld1.32         {x0_x15},[buf,:128]!            //      16-byte input
+
+       vmull.u8        q8, x0_x7, ones                         // 16-bit x0-x7
+       vmull.u8        q9, x8_x15, ones                        // 16-bit x8-x15
+       vmull.u8        q10, x0_x7, sum2_coeff0         // 16-bit x0*16, x1*15, ..., x7*9
+       vmull.u8        q11, x8_x15, sum2_coeff1        // 16-bit x8*8, x9*7, ..., x15*1        
+
+       vadd.u16        q8, q8, q9                                      // 8 16-bit elements for adler
+       vadd.u16        q10, q10, q11                           // 8 16-bit elements for sum2
+       vpaddl.u16      q8, q8                                          // 4 32-bit elements for adler
+       vpaddl.u16      q10, q10                                        // 4 32-bit elements for sum2
+       vpadd.u32       d16,d16,d17                                     // 2 32-bit elements for adler
+       vpadd.u32       d17,d20,d21                                     // 2 32-bit elements for sum2
+       vadd.u32        adlersum2,adler16                       // sum2 += old adler;
+       vpadd.u32       d4,d17,d16                                      // s8 : 32-bit elements for sum2, s9 : 32-bit element for adler
+       vadd.u32        adlersum2,d4                            // update adler/sum2 with the new 16 bytes input
+
+       // mod(alder,BASE); mod(sum2,BASE);
+       vmull.u32       q2,adlersum2,d3[1]                      // alder/BASE, sum2/BASE in Q47
+       vshr.u64        q2,q2,#47                                       // take the integer part
+       vpadd.u32       d4,d4,d5                                        // merge into a double word in d4
+       vmls.u32        adlersum2,d4,d3[0]                      // (adler,sum2) -= floor[(adler,sum2)/BASE]*BASE
+
+       subs            len, nmax                                       // len -= NMAX;
+       bge                     while_len_ge_NMAX_loop          // repeat while len >= NMAX
+
+       add                     len, nmax                                       // post-increment len by NMAX
+
+len_lessthan_NMAX:
+
+       cmp                     len, #0
+       beq                     len_is_zero                                     // if len==0, branch to skip the following
+
+
+       subs            len, #32                                        // pre-decrement len by 32
+       blt                     len_lessthan_32                         // if len < 32, branch to len16_loop 
+
+len32_loop:
+
+       vshll.u32       q12, adlersum2, #5                      // d25 = (0,32*adler) to be added into (adler,sum2)
+       vld1.32         {x0_x15},[buf,:128]!            // 16-byte input x0:x15
+       vmull.u8        q8, x0_x7, ones                         // 16-bit x0-x7
+       vld1.32         {q14}, [buf,:128]!                      // x16:x31
+       vmull.u8        q9, x8_x15, ones                        // 16-bit x8-x15
+       vadd.u32        adlersum2,adler16                       // sum2 += old adler*32;
+       vmull.u8        q12, d28, ones                          // 16-bit x16-x23
+       vmull.u8        q13, d29, ones                          // 16-bit x24-x31
+       vmull.u8        q10, d28, sum2_coeff0           // 16-bit x16*16, x17*15, ..., x23*9
+       vmull.u8        q11, d29, sum2_coeff1           // 16-bit x24*8, x25*7, ..., x31*1      
+       vadd.u16        q8, q8, q9                                      // q8 = (x0+x8):(x7+x15) 8 16-bit elements for adler
+       vmull.u8        q9, x0_x7, d30                          // 16-bit x0*32,...,x7*25
+       vmull.u8        q14, x8_x15, d31                        // 16-bit x8*24,...,x15*17
+       vadd.u16        q12, q12, q13                           // q12 = (x16+x24):(x23+x31) 8 16-bit elements for adler
+       vadd.u16        q10, q11                                        // 8 16-bit elements for sum2
+       vadd.u16        q8, q12                                         // 8 16-bit elements for adler
+       vadd.u16        q9, q14                                         // 8 16-bit elements for sum2 
+       vadd.u16        q10, q9                                         // 8 16-bit elements for sum2
+       vpaddl.u16      q8, q8                                          // 4 32-bit elements for adler
+       vpaddl.u16      q10, q10                                        // 4 32-bit elements for sum2
+       vpadd.u32       d16,d16,d17                                     // 2 32-bit elements for adler
+       vpadd.u32       d17,d20,d21                                     // 2 32-bit elements for sum2
+       subs            len, #32                                        // len -= 32; 
+       vpadd.u32       d4,d17,d16                                      // s8 : 32-bit elements for sum2, s9 : 32-bit element for adler
+       vadd.u32        adlersum2,d4                            // update adler/sum2 with the new 16 bytes input
+
+       bge                     len32_loop
+
+len_lessthan_32:
+
+       adds            len, #(32-16)                           // post-increment len by 32, then pre-decrement by 16
+       blt                     len_lessthan_16                         // if len < 16, branch to len_lessthan_16
+
+       vshll.u32       q12, adlersum2, #4                      // d25 = (0,16*adler) to be added into (adler,sum2)
+
+       vld1.32         {x0_x15},[buf,:128]!            //      16-byte input
+
+
+       vmull.u8        q8, x0_x7, ones                         // 16-bit x0-x7
+       vmull.u8        q9, x8_x15, ones                        // 16-bit x8-x15
+       vmull.u8        q10, x0_x7, sum2_coeff0         // 16-bit x0*16, x1*15, ..., x7*9
+       vmull.u8        q11, x8_x15, sum2_coeff1        // 16-bit x8*8, x9*7, ..., x15*1        
+
+       vadd.u16        q8, q8, q9                                      // 8 16-bit elements for adler
+       vadd.u16        q10, q10, q11                           // 8 16-bit elements for sum2
+       vpaddl.u16      q8, q8                                          // 4 32-bit elements for adler
+       vpaddl.u16      q10, q10                                        // 4 32-bit elements for sum2
+       vpadd.u32       d16,d16,d17                                     // 2 32-bit elements for adler
+       vpadd.u32       d17,d20,d21                                     // 2 32-bit elements for sum2
+       subs            len, #16                                        // decrement len by 16
+       vadd.u32        adlersum2,adler16                       // sum2 += old adler;
+       vpadd.u32       d4,d17,d16                                      // s8 : 32-bit elements for sum2, s9 : 32-bit element for adler
+       vadd.u32        adlersum2,d4                            // update adler/sum2 with the new 16 bytes input
+
+len_lessthan_16:
+       adds            len, #16                                        // post-increment len by 16
+       beq                     len_is_zero_internal            // if len==0, branch to len_is_zero_internal
+
+       // restore adler/sum2 into general registers for remaining (<16) bytes
+
+       vmov            sum2, adler, adlersum2
+remaining_len_loop:
+       ldrb            t, [buf], #1                            // *buf++;
+       subs            len, #1                                         // len--;
+       add                     adler,t                                         // adler += *buf
+       add                     sum2,adler                                      // sum2 += adler
+       bgt                     remaining_len_loop                      // break if len<=0
+
+       vmov            adlersum2, sum2, adler          // move to double register for modulo operation
+
+len_is_zero_internal:
+
+       // mod(alder,BASE); mod(sum2,BASE);
+
+       vmull.u32       q2,adlersum2,d3[1]                      // alder/BASE, sum2/BASE in Q47
+       vshr.u64        q2,q2,#47                                       // take the integer part
+       vpadd.u32       d4,d4,d5                                        // merge into a double word in d4
+       vmls.u32        adlersum2,d4,d3[0]                      // (adler,sum2) -= floor[(adler,sum2)/BASE]*BASE
+
+len_is_zero:
+
+       vmov        sum2, adler, adlersum2              // restore adler/sum2 from (s12=sum2, s13=adler)
+       add                     r0, adler, sum2, lsl #16        // to return adler | (sum2 << 16);
+       ldmfd       sp!, {r4, r5, pc}                   // restore registers and return 
+
+
+       // constants to be loaded into q registers
+       .align  4               // 16 byte aligned
+
+vec_table:
+
+       // coefficients for computing sum2
+       .long   0x0d0e0f10              // s0
+       .long   0x090a0b0c              // s1
+       .long   0x05060708              // s2
+       .long   0x01020304              // s3
+
+       // coefficients for computing adler
+       .long   0x01010101              // s4/d2
+       .long   0x01010101              // s5
+
+       .long   BASE                    // s6 : BASE 
+       .long   0x80078071              // s7 : 1/BASE in Q47
+
+       // q15 : d30.d31
+       .long   0x1d1e1f20              // s0
+       .long   0x191a1b1c              // s1
+       .long   0x15161718              // s2
+       .long   0x11121314              // s3
+
+NMAX_loc:
+       .long   NMAX                    // NMAX
+       
+#endif         // _ARM_ARCH_7
+
+#endif         //  (!KERNEL_SUPPORT_NEON) || (!defined _ARM_ARCH_7)
+
+#endif         // _ARM_ARCH_6
+
diff --git a/libkern/zlib/arm/inffastS.s b/libkern/zlib/arm/inffastS.s
new file mode 100644 (file)
index 0000000..9885579
--- /dev/null
@@ -0,0 +1,571 @@
+#include <arm/arch.h>
+
+// the follow assembly code was hard wired to POSTINC not defined, 
+
+#if 0                  // #ifdef POSTINC
+#  define OFF 0
+#  define PUP(a) *(a)++
+#else
+#  define OFF 1
+#  define PUP(a) *++(a)
+#endif
+
+// the code uses r9, therefore, it does not meet the register protocol for armv5 and below
+// the code can only be used for armv6 and above
+
+#if defined _ARM_ARCH_6 
+
+       .cstring
+       .align 2
+LC0:
+       .ascii "invalid distance too far back\0"
+       .align 2
+LC1:
+       .ascii "invalid distance code\0"
+       .align 2
+LC2:
+       .ascii "invalid literal/length code\0"
+
+       // renaming the register and stack memory use
+
+       #define         out                     r0
+       #define         strm            r10
+       #define         state           r5
+       #define         in                      r11
+       #define         write           r9
+       #define         distcode        r8
+       #define         bits            lr
+       #define         hold            r4
+
+       // stack memory allocation
+
+       #define         window_loc      [sp,#0]
+       #define         last_loc        [sp,#4]
+       #define         beg_loc         [sp,#8]
+       #define         end_loc         [sp,#12]
+       #define         wsize_loc       [sp,#16]
+       #define         whave_loc       [sp,#20]
+       #define         windowm1_loc    [sp,#28]
+       #define         lmask_loc       [sp,#32]
+       #define         dmask_loc       [sp,#36]
+       #define         dist_loc        [sp,#48]
+
+       #define         local_size      52
+
+       // the following defines the variable offset in the inflate_state structure     (in inflate.h)
+
+       #define         state_mode              [state, #0]
+       #define         state_last              [state, #4]
+       #define         state_wrap              [state, #8]
+       #define         state_havedict  [state, #12]
+       #define         state_flags             [state, #16]
+       #define         state_dmax              [state, #20]
+       #define         state_wbits             [state, #36]
+       #define         state_wsize             [state, #40]
+       #define         state_whave             [state, #44]
+       #define         state_write             [state, #48]
+       #define         state_window    [state, #52]
+       #define         state_hold              [state, #56]
+       #define         state_bits              [state, #60]
+       #define         state_lencode   [state, #76]
+       #define         state_distcode  [state, #80]
+       #define         state_lenbits   [state, #84]
+       #define         state_distbits  [state, #88]
+
+
+// void inflate_fast(z_streamp strm, unsigned start)
+// input :     
+//                     r0 = strm, (move to r10) 
+//                     r1 = start      
+
+       .text
+       .align 2
+       .globl _inflate_fast
+_inflate_fast:
+
+       stmfd   sp!, {r4-r6,r8-r11,lr}
+       sub             sp, sp, #local_size
+
+#if defined(_ARM_ARCH_5)
+       ldrd    r2,r3,[r0, #0]                  // r2 = strm->next_in, r3 = strm->avail_in
+#else
+       ldmia   r0, {r2-r3}
+#endif
+
+       sub             in, r2, #OFF                    // in = strm->next_in - OFF; 
+       sub             r2, #(OFF+5)                    // next_in -= (OFF+5);
+       ldr             state, [r0, #28]                // state = (struct inflate_state FAR *)strm->state;
+       add             r3, r3, r2                              // last = next_in - OFF + (avail_in - 5);       next_in already updated
+       mov             strm, r0
+       str             r3, last_loc                    // store last to release r3
+
+       ldr             r3, [r0, #12]                   // next_out
+       ldr             r2, [strm, #16]                 // avail_out
+
+       sub             out, r3, #OFF                   // out = strm->next_out - OFF; r0 is used as out from this point on
+
+       sub             r3, r3, #256                    // next_out - 256
+       rsb             r1, r2, r1                              // start - avail_out
+       sub             r3, r3, #(1+OFF)                // next_out-OFF-257 
+       add             r3, r3, r2                              // r3 = end = avail_out + (next_out-OFF) - 257 = avail_out + out - 257
+       rsb             r2, r1, out                             // r2 = beg = out - (start - avail_out);
+#if defined(_ARM_ARCH_5)
+       strd    r2,r3, beg_loc                  // store beg/end
+       ldrd    r2,r3, state_wsize              // wsize/whave
+       strd    r2,r3, wsize_loc                // store wsize/whave
+       //ldrd  r6,hold, state_window   // window/hold, hold use r7
+       ldr             r6, state_window                // state->window
+       ldr             hold, state_hold                // state->hold
+       nop
+#else
+       // for architecture < armv5, ldrd/strd is not available
+       str             r2, beg_loc                             // store beg
+       str             r3, end_loc                             // store end
+       ldr             r2, state_wsize                 // state->wsize
+       ldr             r3, state_whave                 // state->whave
+       str             r2, wsize_loc                   // store wsize
+       str             r3, whave_loc                   // store whave
+       ldr             r6, state_window                // state->window
+       ldr             hold, state_hold                // state->hold
+#endif
+
+       ldr             ip, state_lencode               // lencode
+       mov             r3, #1                                  // used to derive lmask and dmask
+       ldr             write, state_write              // write (r9 from this point on) : window write index
+       nop
+       str             ip, [sp, #40]                   // save lencode
+       sub             ip, r6, #1                              // window-1
+       str             r6, window_loc                  // store window
+       str             ip, windowm1_loc                // store window-1
+       ldr             r2, state_lenbits               // lenbits
+       ldr             bits, state_bits                // bits, use lr from this point on
+       ldr             distcode, state_distcode// distcode, use r8
+       mov             r2, r3, asl r2                  // (1<<lensbits)
+       ldr             r12, state_distbits             // distbits
+       sub             r2, r2, #1                              // lmask = (1U << state->lenbits) - 1;
+       mov             r3, r3, asl r12                 // (1U << state->distbits)
+       sub             r3, r3, #1                              // dmask = (1U << state->distbits) - 1;
+
+#if defined(_ARM_ARCH_5)
+       strd    r2, r3, lmask_loc               // store lmask/dmask
+#else
+       str             r2, lmask_loc                   // lmask
+       str             r3, dmask_loc                   // dmask
+#endif
+
+       // start the do loop decoding literals and length/distances 
+       // until end-of-block or not enough input data or output space
+
+do_loop:
+       cmp             bits, #15                               // bits vs 15
+       ldr             r1, lmask_loc                   // lmask
+       bge             bitsge15                                // if bits >= 15, skip loading new 16 bits      
+
+       // this is a shortcut with the processor reads data in little-endian mode
+       ldrh    r3, [in,#1]                                     // read 2 bytes 
+       add             in, #2                                          // in pointer += 2
+       add             hold, hold, r3, asl bits        // deposit the new 2 bytes into hold
+       add             bits, #16                                       // bits count += 16
+
+bitsge15:
+       ldr             ip, [sp, #40]                   // restore lencode
+       and             r3, hold, r1                            // r3 = hold & lmask
+       b               dolen
+
+op_not_zero:
+
+       tst     r2, #16                                                 // if (op&16)
+       bne     length_base                                             //              branch to length_base
+
+       tst     r2, #64                                                 // else if (op&64) 
+       bne     end_of_block                                    //              branch to end_of_block processing 
+
+       // 2nd-level length code, this is the part where if ((op & 64) == 0) { ... }
+
+       // this.val + (hold & ((1U << op) - 1)); 
+       // r3 = r1 + hold & ((1<<r2)-1);
+
+       rsb             r12, r2, #32                            // r12 = (32-op)
+       ror     r3, hold, r2                            // rotate the op least significant bits of hold to MSB
+       add             r3, r1, r3, lsr r12                     // r3 = r1 + (op LSBs in hold) = r1 + hold & ((1<<r2)-1); 
+
+       ldr             ip, [sp, #40]                   // restore lencode
+
+dolen:
+
+       // code -> 8-bit code, 8-bit bits, 16-bit val
+       ldrb    r2, [ip,r3,asl #2]              // op = (unsigned)(this.bits);
+       add             r3, ip, r3, asl #2              // r3 = this
+       ldrb    ip, [r3, #1]                            // ip = this.bits
+       ldrh    r1, [r3, #2]                            // r1 = this.value
+       cmp             r2, #0                                          // op == 0 ?
+
+       mov             hold, hold, lsr ip                      // hold >>= this.bits
+       rsb             bits, ip, bits                          // bits -= this.bits
+       bne             op_not_zero                                     // branch to op_not_zero if this.op != 0
+
+       strb    r1, [out, #1]!                          // PUP(out) = (unsigned char)(this.val);
+
+do_loop_while:
+       ldr             r1, last_loc                            // last
+       ldr             r2, end_loc                                     // end
+       cmp             in, r1                                          // compare in vs last 
+       cmpcc   out, r2                                         // if in < last, compare out vs end
+       bcc             do_loop                                         // if (in < last && out < end) go back to do_loop
+
+update_state_and_return:
+
+       sub             r2, in, bits, lsr #3            // r2 = in - (bits>>3)
+
+       add             r3, r2, #OFF                            // r3 = (in - (bits>>3)) + OFF
+       str             r3, [strm, #0]                          // strm->next_in = in + OFF;
+
+       add             r3, out, #OFF                           // out + OFF
+       str             r3, [strm, #12]                         // strm->next_out = out + OFF;
+
+       ldr             r3, last_loc                            // r3 = last
+       ldr             ip, end_loc                                     // ip = end
+
+       cmp             r3, r2                                          // compare last vs in
+       addhi   r3, r3, #5                                      // if last > in, last +=5
+       movls   r6, r3                                          // o.w., r6 = last
+       rsbls   r3, r6, r2                                      //       r3 = in-last
+       rsbhi   r3, r2, r3                                      // r3 = (last+5) - in
+       rsbls   r3, r3, #5                                      // r3 = 5 - (in-last);
+       cmp             out, ip                                         // compare out vs end
+       str             r3, [strm, #4]                          // strm->avail_in = (unsigned)(in < last ? 5 + (last - in) : 5 - (in - last));
+       movcs   r2, ip                                          // if out<end, r2=end
+       addcc   r3, ip, #256                            // if out>=end, r3 = end+256
+       rsbcs   r3, r2, out                                     // if out<end, r3 = out-end
+       addcc   r3, r3, #1                                      // if out>=end, r3 = end+257
+       rsbcs   r3, r3, #256                            // if out<end, r3 = 256-(out-end) = 256 + (end-out)
+       and             bits, #7                                        // this is equivalent to bits -= (bits>>3) << 3;
+       rsbcc   r3, out, r3                                     // if out<end, r3 = 257+end-out
+       addcs   r3, r3, #1                                      // if out>=end, r3 = 257 + (end-out)
+       str             r3, [strm, #16]                         // strm->avail_out = (unsigned)(out < end ?  257 + (end - out) : 257 - (out - end)); 
+
+       // hold &= (1U << bits) - 1;
+
+       rsb             ip, bits, #32                           // 32-bits
+    ror        hold, hold, bits                        // this is equivalent to hold<<(32-bits)
+    lsr        hold, hold, ip                          // logical shift right by (32-bits), hold now only keeps the bits LSBs
+
+       str             bits, state_bits                        // state->bits = bits;
+       str             hold, state_hold                        // state->hold = hold;
+
+       add             sp, #local_size                         // pop out stack memory
+       ldmfd   sp!,{r4-r6,r8-r11,pc}                           // restore registers and return
+
+length_base:                                                   // r2=op, r1=lmask
+       ands    r2, r2, #15                                     // op&=15;
+       mov             r6, r1                                          // len = (unsigned) this.val;
+       beq             op_is_zero                                      // if op==0, branch to op_is_zero
+       cmp             r2, bits                                        // op vs bits
+       ldrhib  r3, [in, #1]!                           // if (op>bits) r3 = (PUP(in));
+       addhi   hold, hold, r3, asl bits        // if (op>bits) hold += (unsigned long)(PUP(in)) << bits;
+
+       rsb             ip, r2, #32                                     // 32-op
+    ror        r3, hold, r2                            // (hold<<(32-op))
+       add             r6, r1, r3, lsr ip                      // len += (unsigned)hold & ((1U << op) - 1);
+
+       addhi   bits, bits, #8                          // if (op>bits) bits += 8;
+
+       mov             hold, hold, lsr r2                      // hold >>= op;
+       rsb             bits, r2, bits                          // bits -= op;
+
+op_is_zero:
+       cmp             bits, #14
+       ldrh    r3,[in,#1]                  // if (bits < 15) { 2 (PUP(in));  no condition code for better performance
+    addls   in, #2                      //     in+=2;
+    addls   hold, hold, r3, asl bits    //     twice hold += (unsigned long)(PUP(in)) << bits;
+    addls   bits, #16                   //     2 bits += 8; }
+
+dodist:
+
+       ldr             r2, dmask_loc                           // r2 = dmask
+       and             r3, hold, r2                            // r3 = hold & dmask
+       mov             r2, r3, asl #2
+       add             r3, r2, distcode                        // &dcode[hold&dmask];
+       ldrb    ip, [r2, distcode]                      // op
+       ldrh    r1, [r3, #2]                            // dist = (unsigned)(this.val);
+       tst             ip, #16                                         // op vs 16
+       ldrb    r3, [r3, #1]                            // this.bits
+       mov             hold, hold, lsr r3                      // hold >>= this.bits;
+       rsb             bits, r3, bits                          // bits -= this.bits;
+       bne             distance_base                           // if (op&16) { distance base processing  }     
+       tst             ip, #64                                         // 
+       beq             second_distance_code            // else if ((op&64)==0) branch to 2nd level distance code
+
+       b               invalide_distance_code
+
+check_2nd_level_distance_code:
+
+       tst             r2, #64                                         // check for esle if ((op & 64) == 0) for 2nd level distance code
+       bne             invalide_distance_code
+
+second_distance_code:
+
+       rsb             r2, ip, #32                                     // 32-op
+       ror             r3, hold, ip                            // hold<<(32-op)
+       add             r3, r1, r3, lsr r2                      // this.val + (hold & ((1U << op) - 1))
+
+       mov             r2, r3, asl #2
+       add             r3, r2, distcode                        // this = dcode[this.val + (hold & ((1U << op) - 1))];
+       ldrb    r2, [r2, distcode]                      // this.op
+       ldrh    r1, [r3, #2]                            // this.val
+
+       tst             r2, #16                                         // op&16
+       ldrb    r3, [r3, #1]                            // this.bits
+       mov             ip, r2                                          // op
+       mov             hold, hold, lsr r3                      // hold >> = this.bits
+       rsb             bits, r3, bits                          // bits -= this.bits
+       beq             check_2nd_level_distance_code
+
+distance_base:                 // this is invoked from if ((op&16)!=0)
+
+       and             r2, ip, #15                                     // op &= 15;
+       cmp             r2, bits                                        // op vs bits
+       ldrhib  r3, [in, #1]!                           // if (op > bits) (PUP(in))
+       addhi   hold, hold, r3, asl bits        //              hold += (unsigned long)(PUP(in)) << bits;
+       addhi   bits, bits, #8                          //              bits += 8;      
+       cmphi   r2, bits                                        //              internel (bits < op)
+       ldrhib  r3, [in, #1]!                           //              if (op > bits) (PUP(in))
+       addhi   hold, hold, r3, asl bits        //                      hold += (unsigned long)(PUP(in)) << bits;
+
+       rsb             ip, r2, #32                                     // (32-op)
+       ror             r3, hold, r2                            // hold<<(32-op)
+       add             r3, r1, r3, lsr ip                      // dist += (unsigned)hold & ((1U << op) - 1);
+
+       ldr             ip, beg_loc                                     // beg
+
+#ifdef INFLATE_STRICT
+       ldr     r1, state_dmax                          // r1 = dmax
+#endif
+
+       str             r3, dist_loc                            // save dist
+
+#ifdef INFLATE_STRICT
+       cmp             r3, r1                                                          // dist vs dmax 
+       bgt             invalid_distance_too_far_back           // if dist > dmax, set up msg/mode = bad and break
+#endif
+
+       ldr             r1, dist_loc                            // dist
+       rsb             r3, ip, out                                     // (out - beg);
+       addhi   bits, bits, #8                          // this is the internel bits += 8 from above
+
+       cmp             r1, r3                                          // dist vs (out - beg) 
+
+       mov             hold, hold, lsr r2                      // hold >>= op ;
+       rsb             bits, r2, bits                          // bits -= op;
+       rsbls   r2, r1, out                                     // if (dist<=op) r2 = from = out-dist
+       bls             copy_direct_from_output         // if (dist<=op) branch to copy_direct_from_output
+
+       ldr             r2, whave_loc                                   // whave
+       rsb             r1, r3, r1                                              // op = dist-op
+       cmp             r2, r1                                                  // whave vs op
+       nop                                                                             // pad dummy for better performance
+       bcc             invalid_distance_too_far_back   // if whave < op,  message invalid distance too far back, and break
+
+       cmp             write, #0                                               // write
+       bne             non_very_common_case                    // if (write ==0) non_very_common_case
+
+       // the following : if (write == 0) { /* very common case */ }
+       nop                                                                             // pad dummy for better performance
+       ldr             ip, wsize_loc                                   // wsize
+       cmp             r6, r1                                                  // len vs op 
+       rsb             r3, r1, ip                                              // wsize - op
+       ldr             ip, windowm1_loc                                // window - 1
+       add             r2, ip, r3                                              // from = window - 1 + wsize - op : setup for using PUP(from)
+       movhi   r3, r1                                                  // if len > op, r3 = op
+       movhi   r1, out                                                 // if len > op, r1 = out
+       bhi             some_from_window                                // if (len > op), branch to some_from_window
+
+finish_copy:
+
+       //      while (len > 2) { 
+       //              PUP(out) = PUP(from); 
+       //              PUP(out) = PUP(from); 
+       //              PUP(out) = PUP(from); 
+       //              len -= 3; 
+       //      } 
+       //      if (len) { 
+       //              PUP(out) = PUP(from); 
+       //              if (len > 1) 
+       //              PUP(out) = PUP(from); 
+       //      }
+
+       cmp             r6, #2                                                  // len > 2 ?
+       movls   r1, r6                                                  // if (len<=2) r1 = len
+       bls             lenle2                                                  // if (len<=2) branch to lenle2
+       mov             r1, r6
+fcopy_per3bytes:
+       ldrb    r3, [r2, #1]                                    // 1st PUP(from)
+       sub             r1, r1, #3                                              // len-=3
+       cmp             r1, #2                                                  // len > 2 ?
+       strb    r3, [out, #1]                                   // 1st PUP(out) = PUP(from);
+       ldrb    r3, [r2, #2]                                    // 2nd PUP(from)
+       add             r2, r2, #3                                              // from+=3
+       strb    r3, [out, #2]                                   // 2nd PUP(out) = PUP(from);
+       ldrb    r3, [r2, #0]                                    // 3rd PUP(from)
+       add             out, out, #3                                    // out+=3
+       strb    r3, [out, #0]                                   // 3rd PUP(out) = PUP(from);
+       bgt             fcopy_per3bytes                                 // while (len>3) back to loop head      
+lenle2:
+       cmp             r1, #0                                                  // len
+       beq             do_loop_while                                   // back to while loop head if len==0    
+       ldrb    r3, [r2, #1]                                    // PUP(from)
+       cmp             r1, #2                                                  // check whether len==2
+       strb    r3, [out, #1]!                                  // PUP(out) = PUP(from);
+       bne             do_loop_while                                   // back to while loop head if len==1 
+       ldrb    r3, [r2, #2]                                    // 2nd PUP(from)
+       strb    r3, [out, #1]!                                  // 2nd PUP(out) = PUP(from);
+       b               do_loop_while                                   // back to while loop head
+
+end_of_block:
+       tst             r2, #32                                         // if (op&32)
+       movne   r3, #11                                         //   TYPE?
+       strne   r3, state_mode                          // state-mode = TYPE
+       bne             update_state_and_return         // break the do loop and branch to get ready to return
+       ldr             r3, messages                            // "invalid literal/length code" message
+L75:
+       add             r3, pc, r3
+       str             r3, [strm, #24]                         // strm->msg = (char *)"invalid literal/length code";
+       mov             r3, #27                                         // BAD?
+       str             r3, state_mode                          // state->mode = BAD;
+       b               update_state_and_return         // break the do loop and branch to get ready to return
+
+//Read_2_bytes:
+//     ldrh    r3,[in,#1]                                      // 2 (PUP(in)) together
+//     add             in, #2                                          // 2 in++
+//     add             hold, hold, r3, asl bits        // twice hold += (unsigned long)(PUP(in)) << bits;
+//     add             bits, #16                                       // 2 bits += 8;
+//     b               dodist                                          // branch to dodist 
+       nop                                                                     // a pad dummy instruction to give better performance
+
+copy_direct_from_output:                               // r2 = from = out - dist ;
+
+                                                                               // do {
+       ldrb    r3, [r2, #1]                            //      1st PUP(from)
+       sub             r6, r6, #3                                      //      len-=3
+       cmp             r6, #2                                          //      len vs 2
+       strb    r3, [out, #1]                           //      1st PUP(out) = PUP(from);
+       ldrb    r3, [r2, #2]                            //      2nd PUP(from)
+       add             r2, r2, #3                                      //      update from+=3
+       strb    r3, [out, #2]                           //      2nd PUP(out) = PUP(from);
+       ldrb    r3, [r2, #0]                            //      3rd PUP(from);
+       add             out, out, #3                            //      update out+=3
+       strb    r3, [out, #0]                           //      3rd PUP(out) = PUP(from);
+       bhi             copy_direct_from_output         // while (len>2);
+
+       // len in r6 can now be 0 1 or 2
+
+       subs    r6,#1                                           // len--;
+    ldrb    r3, [r2, #1]                               // PUP(from)
+    blt     do_loop_while                              // if len<0 back to while loop head
+    strb    r3, [out, #1]!                             // PUP(out) = PUP(from);
+    subs    r6, #1                                             // len--;
+    ldrb    r3, [r2, #2]                               // 2nd PUP(from)
+    blt     do_loop_while                              // if len<0 back to while loop head
+    strb    r3, [out, #1]!                             // 2nd PUP(out) = PUP(from);
+    b       do_loop_while                              // back to while loop head
+
+
+invalide_distance_code:
+       ldr             r3, messages+4                          // "invalid distance code"
+L72:
+       add             r3, pc, r3
+       str             r3, [strm, #24]                         // strm->msg = (char *)"invalid distance code";
+       mov             r3, #27
+       str             r3, state_mode                          // state->mode = BAD;
+       b               update_state_and_return         // break, restore registers, and return
+
+
+some_from_window:
+       add             out, r3, out                            // out += op
+       rsb             r6, r3, r6                                      // len -= op 
+some_from_window_loop:                                 // do {
+       ldrb    ip, [r2, #1]!                           //              PUP(from);
+       subs    r3, r3, #1                                      //              --op    
+       strb    ip, [r1, #1]!                           //              PUP(out) = PUP(from);
+       bne             some_from_window_loop           // } while(op);
+       ldr             r3, dist_loc                            // dist
+       rsb             r2, r3, out                                     // from = out - dist;
+       b               finish_copy
+
+non_very_common_case:
+       cmp             write, r1                                       // write vs op
+       nop                                                                     // pad dummy for better performance
+       bcs             contiguous_in_window            // if (write >= op) branch to contiguous_in_window
+
+       /* wrap around window */
+
+       ldr             r2, wsize_loc                           // wsize
+       ldr             ip, windowm1_loc                        // window-1
+       add             r3, write, r2                           // r3 = wsize+write
+       rsb             r3, r1, r3                                      // r3 = wsize+write-op
+       add             r2, ip, r3                                      // r2 = from = wsize+write-op+window-1;
+       rsb             r1, write, r1                           // op -= write;
+
+       cmp             r6, r1                                          // len vs op
+       bls             finish_copy                                     // if (len <= op) branch to finish_copy
+       rsb             r6, r1, r6                                      // len -= op
+waw_loop:                                                              // do {
+       ldrb    r3, [r2, #1]!                           //      PUP(from)
+       subs    r1, r1, #1                                      //  --op; 
+       strb    r3, [out, #1]!                          //  PUP(out) = PUP(from);
+       bne             waw_loop                                        // } while (op); 
+
+       cmp             write, r6                                       // write vs len
+       ldrcs   r2, windowm1_loc                        // if (write>=len) r2 = from = window-1;
+       bcs             finish_copy                                     // if (write>=len) branch to finish_copy
+
+       // some from start of window
+
+       mov             r1, write                               // op = write
+       sub             r6, write                               // len -= op
+       sub             ip, out
+       add             ip, #1                                  // out+ip -> from
+sow_loop:                                                      // do { 
+       ldrb    r3,[out, ip]                    //      PUP(from)
+       subs    r1, #1                                  //  --op;
+       strb    r3, [out,#1]!                   //  PUP(out) = PUP(from);
+       bne             sow_loop                                // } while (op);
+
+       ldr             r2, dist_loc                    // dist
+       sub             r6, r6, write                   // len -= write 
+       rsb             r2, r2, out                             // r2 = from = out-dist
+       b               finish_copy                             // continue to finish_copy
+
+
+contiguous_in_window:
+       ldr             ip, windowm1_loc                // window-1
+       cmp             r6, r1                                  // len vs op
+       rsb             r3, r1, write                   // r3 = write-op
+       add             r2, ip, r3                              // r2 = from = window+write-op-1
+       bls             finish_copy                             // if (len <= op) branch to finish_copy
+       rsb             r6, r1, r6                              // len -= op 
+       ldr             r3, dist_loc                    // dist
+ciw_loop:
+       ldrb    ip, [r2, #1]!                   // PUP(from)
+       subs    r1, r1, #1                              // op--
+       strb    ip, [out, #1]!                  // PUP(out) = PUP(from);
+       bne             ciw_loop                                // while (--op); 
+       rsb             r2, r3, out                             // from = out - dist;
+       b               finish_copy
+
+invalid_distance_too_far_back:
+       ldr             r3, messages+8                                  // "invalid distance too far back"
+L42:
+       add             r3, pc, r3
+       str             r3, [strm, #24]                                 // strm->msg = (char *)"invalid distance too far back";
+       mov             r3, #27
+       str             r3, state_mode                                  // state->mode = BAD;
+       b               update_state_and_return                 // break, restore registers, and return
+
+       .align 2
+messages:
+       .long   LC2-8-(L75)
+       .long   LC1-8-(L72)
+       .long   LC0-8-(L42)
+
+#endif // defined _ARM_ARCH_6
index 82d2795c0d9ebd8f432756ea5e012728bbd13a28..54f0ee81505d6dac8468eeda3772c08c80cdc8e8 100644 (file)
  * For conditions of distribution and use, see copyright notice in zlib.h
  */
 
  * For conditions of distribution and use, see copyright notice in zlib.h
  */
 
+
+#if defined _ARM_ARCH_6
+
+       // dummy definition, for armv6 or above, compile code from inffastS.s
+       typedef char DummyDefinition;
+
+#else  // architecture
+
 #include "zutil.h"
 #include "inftrees.h"
 #include "inflate.h"
 #include "zutil.h"
 #include "inftrees.h"
 #include "inflate.h"
@@ -343,3 +351,5 @@ unsigned start;         /* inflate()'s starting value for strm->avail_out */
  */
 
 #endif /* !ASMINF */
  */
 
 #endif /* !ASMINF */
+
+#endif // architecture
index 0366b6215a9786482964624710c221bb358dfc3e..60c9bee2f785ca679f34f6bb94da0b7ffdffee92 100644 (file)
@@ -250,7 +250,7 @@ ifeq (-arch armv6,$(ARCH_FLAGS_ARM))
 CFLAGS_ARM             += -mthumb
 endif
 ifeq (-arch armv5,$(ARCH_FLAGS_ARM))
 CFLAGS_ARM             += -mthumb
 endif
 ifeq (-arch armv5,$(ARCH_FLAGS_ARM))
-CFLAGS_ARM             += -mthumb
+CFLAGS_ARM             += -mno-thumb
 endif
 ifeq (-arch xscale,$(ARCH_FLAGS_ARM))
 CFLAGS_ARM             += -mthumb
 endif
 ifeq (-arch xscale,$(ARCH_FLAGS_ARM))
 CFLAGS_ARM             += -mthumb
@@ -394,7 +394,8 @@ export LDFLAGS_KERNEL_ARM     = \
        -Wl,-new_linker \
        -Wl,-pagezero_size,0x0 \
        -Wl,-segaddr,__HIB,0xC0000000 \
        -Wl,-new_linker \
        -Wl,-pagezero_size,0x0 \
        -Wl,-segaddr,__HIB,0xC0000000 \
-       -Wl,-image_base,0xC0008000
+       -Wl,-image_base,0xC0008000 \
+       -Wl,-exported_symbols_list,$(TARGET)/kernel-kpi.exp
 
 
 export LDFLAGS_KERNEL  = $(LDFLAGS_KERNEL_GEN) \
 
 
 export LDFLAGS_KERNEL  = $(LDFLAGS_KERNEL_GEN) \
index 618a7849f4aa60491ee1bbecd5d0cfce3805e133..3ba71308348ca474b9e39ee972677bdf311cd9ad 100644 (file)
@@ -570,6 +570,7 @@ do_build_mach_kernel: $(TARGET)/kgmacros $(TARGET)/mach_kernel
 
 $(TARGET)/mach_kernel: $(addprefix $(TARGET)/,$(foreach component,$(COMPONENT_LIST), $(addprefix $(component)/$(firstword $($(addsuffix _KERNEL_CONFIG, $(shell printf $(component) | tr a-z A-Z))) $(KERNEL_CONFIG))/, $(addsuffix .o, $(component))))) lastkernelconstructor.o
        $(_v)${MAKE} version.o
 
 $(TARGET)/mach_kernel: $(addprefix $(TARGET)/,$(foreach component,$(COMPONENT_LIST), $(addprefix $(component)/$(firstword $($(addsuffix _KERNEL_CONFIG, $(shell printf $(component) | tr a-z A-Z))) $(KERNEL_CONFIG))/, $(addsuffix .o, $(component))))) lastkernelconstructor.o
        $(_v)${MAKE} version.o
+       $(_v)${MAKE} build_mach_kernel_exports
        @echo LD mach_kernel.sys
        $(_v)$(CAT) $(addprefix $(TARGET)/,$(foreach component,$(COMPONENT_LIST), $(addprefix $(component)/$(firstword $($(addsuffix _KERNEL_CONFIG, $(shell printf $(component) | tr a-z A-Z))) $(KERNEL_CONFIG))/, $(addsuffix .o, $(component))))) > mach_kernel.filelist
        $(_v)$(LD) $(LDFLAGS_KERNEL) -filelist mach_kernel.filelist version.o lastkernelconstructor.o `if [ -e $(STATIC_KMODS) ]; then echo $(STATIC_KMODS); fi` \
        @echo LD mach_kernel.sys
        $(_v)$(CAT) $(addprefix $(TARGET)/,$(foreach component,$(COMPONENT_LIST), $(addprefix $(component)/$(firstword $($(addsuffix _KERNEL_CONFIG, $(shell printf $(component) | tr a-z A-Z))) $(KERNEL_CONFIG))/, $(addsuffix .o, $(component))))) > mach_kernel.filelist
        $(_v)$(LD) $(LDFLAGS_KERNEL) -filelist mach_kernel.filelist version.o lastkernelconstructor.o `if [ -e $(STATIC_KMODS) ]; then echo $(STATIC_KMODS); fi` \
@@ -606,6 +607,14 @@ lastkernelconstructor.o: $(SRCROOT)/libsa/lastkernelconstructor.c
 $(TARGET)/kgmacros: $(SRCROOT)/kgmacros
        $(_v)$(INSTALL) $(INSTALL_FLAGS) $? $@
 
 $(TARGET)/kgmacros: $(SRCROOT)/kgmacros
        $(_v)$(INSTALL) $(INSTALL_FLAGS) $? $@
 
+.PHONY: build_mach_kernel_exports
+build_mach_kernel_exports:
+       $(_v)${MAKE}                                    \
+               MAKEFILES=${SOURCE}/config/Makefile     \
+               SOURCE=${SOURCE}/config                 \
+               TARGET=$${TARGET}                       \
+       build_mach_kernel_exports;
+
 # Special rules to install machine configuration variants
 
 $(DSTROOT)$(INSTALL_FILE_DIR)mach.$(KERNEL_CONFIG_LC).$(MACHINE_CONFIG_LC): $(TARGET)/mach_kernel force_file_install
 # Special rules to install machine configuration variants
 
 $(DSTROOT)$(INSTALL_FILE_DIR)mach.$(KERNEL_CONFIG_LC).$(MACHINE_CONFIG_LC): $(TARGET)/mach_kernel force_file_install
index 76e39eb65f95e286fb506448b8a06f5f8d0094ea..cadb1a9761cbef002b7f81e9d557a2fa98efa968 100644 (file)
@@ -230,6 +230,10 @@ options   CONFIG_EMBEDDED                       # <config_embedded>
 #
 options   CONFIG_ENFORCE_SIGNED_CODE           # <config_embedded>
 
 #
 options   CONFIG_ENFORCE_SIGNED_CODE           # <config_embedded>
 
+# support dynamic signing of code
+#
+options                CONFIG_DYNAMIC_CODE_SIGNING     # <dynamic_codesigning>
+
 # vc_progress_white - make the progress gear white instead of black
 options          CONFIG_VC_PROGRESS_WHITE              # <vc_progress_white>
 
 # vc_progress_white - make the progress gear white instead of black
 options          CONFIG_VC_PROGRESS_WHITE              # <vc_progress_white>
 
index 29fa14890254216eb551ba9362fb28f2c2ecb0ae..ca0acb6d12352b821a69f08bf11b618247c76d61 100644 (file)
@@ -35,6 +35,7 @@
 #include <sys/errno.h>
 #include <string.h>
 #include <machine/machlimits.h>
 #include <sys/errno.h>
 #include <string.h>
 #include <machine/machlimits.h>
+#include <pexpert/pexpert.h>
 
 extern struct vc_info vinfo;
 extern boolean_t panicDialogDesired;
 
 extern struct vc_info vinfo;
 extern boolean_t panicDialogDesired;
@@ -51,7 +52,6 @@ static int panic_dialog_verify( const struct panicimage * data, unsigned int siz
 static int pixels_needed_to_blit_digit( int digit );
 static void blit_digit( int digit );
 static const char * strnstr(const char * s, const char * find, size_t slen);
 static int pixels_needed_to_blit_digit( int digit );
 static void blit_digit( int digit );
 static const char * strnstr(const char * s, const char * find, size_t slen);
-void dim_screen(void);
 static void panic_blit_rect(unsigned int x, unsigned int y, unsigned int width,
                            unsigned int height, int transparent,
                            const unsigned char * dataPtr);
 static void panic_blit_rect(unsigned int x, unsigned int y, unsigned int width,
                            unsigned int height, int transparent,
                            const unsigned char * dataPtr);
@@ -839,40 +839,6 @@ decode_rle(const unsigned char *dataPtr, unsigned int *quantity,
 }
 
 
 }
 
 
-void 
-dim_screen(void)
-{
-       unsigned int *p, *endp, *row;
-       int      col, rowline, rowlongs;
-       register unsigned int mask;
-
-       if(!vinfo.v_depth)
-               return;
-
-       if ( vinfo.v_depth == 32 )
-               mask = 0x007F7F7F;
-       else if ( vinfo.v_depth == 30 )
-               mask = (0x1ff<<20) | (0x1ff<<10) | 0x1ff;
-       else if ( vinfo.v_depth == 16 )
-               mask = 0x3DEF3DEF;
-       else
-               return;
-
-       rowline = (int)(vinfo.v_rowscanbytes / 4);
-       rowlongs = (int)(vinfo.v_rowbytes / 4);
-
-       p = (unsigned int*) vinfo.v_baseaddr;
-       endp = p + (rowlongs * vinfo.v_height);
-
-       for (row = p ; row < endp ; row += rowlongs) {
-               for (p = &row[0], col = 0; col < rowline; col++) {
-                       *p = (*p >> 1) & mask;
-                       ++p;
-               }
-       }
-}
-
-
 /* From user mode Libc - this ought to be in a library */
 static const char *
 strnstr(const char * s, const char * find, size_t slen)
 /* From user mode Libc - this ought to be in a library */
 static const char *
 strnstr(const char * s, const char * find, size_t slen)
index 49dc6da916fa6fad146f5bb5bad846014d0d68e4..8c0dc3bf25e7a85d071638734ddbc832a76a8c91 100644 (file)
@@ -2506,6 +2506,39 @@ initialize_screen(PE_Video * boot_vinfo, unsigned int op)
 #endif /* GRATEFULDEBUGGER */
 }
 
 #endif /* GRATEFULDEBUGGER */
 }
 
+void 
+dim_screen(void)
+{
+       unsigned int *p, *endp, *row;
+       int      col, rowline, rowlongs;
+       register unsigned int mask;
+
+       if(!vinfo.v_depth)
+               return;
+
+       if ( vinfo.v_depth == 32 )
+               mask = 0x007F7F7F;
+       else if ( vinfo.v_depth == 30 )
+               mask = (0x1ff<<20) | (0x1ff<<10) | 0x1ff;
+       else if ( vinfo.v_depth == 16 )
+               mask = 0x3DEF3DEF;
+       else
+               return;
+
+       rowline = (int)(vinfo.v_rowscanbytes / 4);
+       rowlongs = (int)(vinfo.v_rowbytes / 4);
+
+       p = (unsigned int*) vinfo.v_baseaddr;
+       endp = p + (rowlongs * vinfo.v_height);
+
+       for (row = p ; row < endp ; row += rowlongs) {
+               for (p = &row[0], col = 0; col < rowline; col++) {
+                       *p = (*p >> 1) & mask;
+                       ++p;
+               }
+       }
+}
+
 void vcattach(void); /* XXX gcc 4 warning cleanup */
 
 void
 void vcattach(void); /* XXX gcc 4 warning cleanup */
 
 void
index 204a85ab6412c98e3bb05d7fe0dd037d5d8b038d..c623ba72eb04b17f7fe44a98a2add6d50e1142e1 100644 (file)
@@ -155,7 +155,7 @@ typedef struct _cframe_t {
 static unsigned panic_io_port;
 static unsigned        commit_paniclog_to_nvram;
 
 static unsigned panic_io_port;
 static unsigned        commit_paniclog_to_nvram;
 
-int debug_boot_arg;
+unsigned int debug_boot_arg;
 
 void
 machine_startup(void)
 
 void
 machine_startup(void)
@@ -167,13 +167,14 @@ machine_startup(void)
             halt_in_debugger = halt_in_debugger ? 0 : 1;
 #endif
 
             halt_in_debugger = halt_in_debugger ? 0 : 1;
 #endif
 
-       if (PE_parse_boot_argn("debug", &boot_arg, sizeof (boot_arg))) {
-               if (boot_arg & DB_HALT) halt_in_debugger=1;
-               if (boot_arg & DB_PRT) disable_debug_output=FALSE; 
-               if (boot_arg & DB_SLOG) systemLogDiags=TRUE; 
-               if (boot_arg & DB_NMI) panicDebugging=TRUE; 
-               if (boot_arg & DB_LOG_PI_SCRN) logPanicDataToScreen=TRUE;
-               debug_boot_arg = boot_arg;
+       if (PE_parse_boot_argn("debug", &debug_boot_arg, sizeof (debug_boot_arg))) {
+               if (debug_boot_arg & DB_HALT) halt_in_debugger=1;
+               if (debug_boot_arg & DB_PRT) disable_debug_output=FALSE; 
+               if (debug_boot_arg & DB_SLOG) systemLogDiags=TRUE; 
+               if (debug_boot_arg & DB_NMI) panicDebugging=TRUE; 
+               if (debug_boot_arg & DB_LOG_PI_SCRN) logPanicDataToScreen=TRUE;
+       } else {
+               debug_boot_arg = 0;
        }
 
        if (!PE_parse_boot_argn("nvram_paniclog", &commit_paniclog_to_nvram, sizeof (commit_paniclog_to_nvram)))
        }
 
        if (!PE_parse_boot_argn("nvram_paniclog", &commit_paniclog_to_nvram, sizeof (commit_paniclog_to_nvram)))
@@ -714,13 +715,11 @@ panic_io_port_read(void) {
 /* For use with the MP rendezvous mechanism
  */
 
 /* For use with the MP rendezvous mechanism
  */
 
-#if !CONFIG_EMBEDDED
 static void
 machine_halt_cpu(__unused void *arg) {
        panic_io_port_read();
        pmCPUHalt(PM_HALT_DEBUG);
 }
 static void
 machine_halt_cpu(__unused void *arg) {
        panic_io_port_read();
        pmCPUHalt(PM_HALT_DEBUG);
 }
-#endif
 
 void
 Debugger(
 
 void
 Debugger(
@@ -762,7 +761,7 @@ Debugger(
 #endif
 
                /* Print backtrace - callee is internally synchronized */
 #endif
 
                /* Print backtrace - callee is internally synchronized */
-               panic_i386_backtrace(stackptr, 20, NULL, FALSE, NULL);
+               panic_i386_backtrace(stackptr, 32, NULL, FALSE, NULL);
 
                /* everything should be printed now so copy to NVRAM
                 */
 
                /* everything should be printed now so copy to NVRAM
                 */
@@ -819,23 +818,28 @@ Debugger(
                        }
                     }
                 }
                        }
                     }
                 }
-               draw_panic_dialog();
+
+               /* If the user won't be able to read the dialog,
+                * don't bother trying to show it
+                */
+               if (!PE_reboot_on_panic())
+                       draw_panic_dialog();
 
                if (!panicDebugging) {
                        /* Clear the MP rendezvous function lock, in the event
                         * that a panic occurred while in that codepath.
                         */
                        mp_rendezvous_break_lock();
 
                if (!panicDebugging) {
                        /* Clear the MP rendezvous function lock, in the event
                         * that a panic occurred while in that codepath.
                         */
                        mp_rendezvous_break_lock();
-#if CONFIG_EMBEDDED
-                       PEHaltRestart(kPEPanicRestartCPU);
-#else
+                       if (PE_reboot_on_panic()) {
+                               PEHaltRestart(kPEPanicRestartCPU);
+                       }
+
                        /* Force all CPUs to disable interrupts and HLT.
                         * We've panicked, and shouldn't depend on the
                         * PEHaltRestart() mechanism, which relies on several
                         * bits of infrastructure.
                         */
                        mp_rendezvous_no_intrs(machine_halt_cpu, NULL);
                        /* Force all CPUs to disable interrupts and HLT.
                         * We've panicked, and shouldn't depend on the
                         * PEHaltRestart() mechanism, which relies on several
                         * bits of infrastructure.
                         */
                        mp_rendezvous_no_intrs(machine_halt_cpu, NULL);
-#endif
                        /* NOT REACHED */
                }
         }
                        /* NOT REACHED */
                }
         }
index 470e8a3e73cdbb347f21c606f32d0ecb03989144..58791ecb83a7b5e7a803384f06c64f5698c21cce 100644 (file)
@@ -52,7 +52,6 @@
 #define        k64Bit                          0x00000200      /* processor supports EM64T (not what mode you're running in) */
 #define        kHasSSE4_1                      0x00000400
 #define        kHasSSE4_2                      0x00000800
 #define        k64Bit                          0x00000200      /* processor supports EM64T (not what mode you're running in) */
 #define        kHasSSE4_1                      0x00000400
 #define        kHasSSE4_2                      0x00000800
-#define        kHasAES                         0x00001000
 #define        kInOrderPipeline                0x00002000      /* in-order execution */
 #define        kSlow                           0x00004000      /* tsc < nanosecond */
 #define        kUP                             0x00008000      /* set if (kNumCPUs == 1) */
 #define        kInOrderPipeline                0x00002000      /* in-order execution */
 #define        kSlow                           0x00004000      /* tsc < nanosecond */
 #define        kUP                             0x00008000      /* set if (kNumCPUs == 1) */
index c247a157df5b7f6459589a9dd4a53d2b29e51bc4..1ddb1469e0b645033961add91e439f5aa1c77af9 100644 (file)
@@ -573,6 +573,7 @@ cpuid_set_generic_info(i386_cpu_info_t *info_p)
                cpuid_fn(6, reg);
                ctp->sensor               = bitfield32(reg[eax], 0, 0);
                ctp->dynamic_acceleration = bitfield32(reg[eax], 1, 1);
                cpuid_fn(6, reg);
                ctp->sensor               = bitfield32(reg[eax], 0, 0);
                ctp->dynamic_acceleration = bitfield32(reg[eax], 1, 1);
+               ctp->invariant_APIC_timer = bitfield32(reg[eax], 2, 2);
                ctp->thresholds           = bitfield32(reg[ebx], 3, 0);
                ctp->ACNT_MCNT            = bitfield32(reg[ecx], 0, 0);
                info_p->cpuid_thermal_leafp = ctp;
                ctp->thresholds           = bitfield32(reg[ebx], 3, 0);
                ctp->ACNT_MCNT            = bitfield32(reg[ecx], 0, 0);
                info_p->cpuid_thermal_leafp = ctp;
@@ -727,9 +728,9 @@ static struct {
 extfeature_map[] = {
        {CPUID_EXTFEATURE_SYSCALL, "SYSCALL"},
        {CPUID_EXTFEATURE_XD,      "XD"},
 extfeature_map[] = {
        {CPUID_EXTFEATURE_SYSCALL, "SYSCALL"},
        {CPUID_EXTFEATURE_XD,      "XD"},
+       {CPUID_EXTFEATURE_RDTSCP,  "RDTSCP"},
        {CPUID_EXTFEATURE_EM64T,   "EM64T"},
        {CPUID_EXTFEATURE_LAHF,    "LAHF"},
        {CPUID_EXTFEATURE_EM64T,   "EM64T"},
        {CPUID_EXTFEATURE_LAHF,    "LAHF"},
-       {CPUID_EXTFEATURE_RDTSCP,  "RDTSCP"},
        {CPUID_EXTFEATURE_TSCI,    "TSCI"},
        {0, 0}
 };
        {CPUID_EXTFEATURE_TSCI,    "TSCI"},
        {0, 0}
 };
index 135ededc36f7ef0122995c6d47e9d11bf7c20c4a..32b07e12ab4a6440a42720de7ee774ec3fb83ce8 100644 (file)
@@ -84,6 +84,7 @@
 #define CPUID_FEATURE_PBE     _Bit(31) /* Pend Break Enable */
 
 #define CPUID_FEATURE_SSE3    _HBit(0) /* Streaming SIMD extensions 3 */
 #define CPUID_FEATURE_PBE     _Bit(31) /* Pend Break Enable */
 
 #define CPUID_FEATURE_SSE3    _HBit(0) /* Streaming SIMD extensions 3 */
+
 #define CPUID_FEATURE_MONITOR _HBit(3) /* Monitor/mwait */
 #define CPUID_FEATURE_DSCPL   _HBit(4) /* Debug Store CPL */
 #define CPUID_FEATURE_VMX     _HBit(5) /* VMX */
 #define CPUID_FEATURE_MONITOR _HBit(3) /* Monitor/mwait */
 #define CPUID_FEATURE_DSCPL   _HBit(4) /* Debug Store CPL */
 #define CPUID_FEATURE_VMX     _HBit(5) /* VMX */
@@ -95,6 +96,7 @@
 #define CPUID_FEATURE_CX16    _HBit(13)        /* CmpXchg16b instruction */
 #define CPUID_FEATURE_xTPR    _HBit(14)        /* Send Task PRiority msgs */
 #define CPUID_FEATURE_PDCM    _HBit(15)        /* Perf/Debug Capability MSR */
 #define CPUID_FEATURE_CX16    _HBit(13)        /* CmpXchg16b instruction */
 #define CPUID_FEATURE_xTPR    _HBit(14)        /* Send Task PRiority msgs */
 #define CPUID_FEATURE_PDCM    _HBit(15)        /* Perf/Debug Capability MSR */
+
 #define CPUID_FEATURE_DCA     _HBit(18)        /* Direct Cache Access */
 #define CPUID_FEATURE_SSE4_1  _HBit(19)        /* Streaming SIMD extensions 4.1 */
 #define CPUID_FEATURE_SSE4_2  _HBit(20)        /* Streaming SIMD extensions 4.2 */
 #define CPUID_FEATURE_DCA     _HBit(18)        /* Direct Cache Access */
 #define CPUID_FEATURE_SSE4_1  _HBit(19)        /* Streaming SIMD extensions 4.1 */
 #define CPUID_FEATURE_SSE4_2  _HBit(20)        /* Streaming SIMD extensions 4.2 */
  */
 #define CPUID_EXTFEATURE_SYSCALL   _Bit(11)    /* SYSCALL/sysret */
 #define CPUID_EXTFEATURE_XD       _Bit(20)     /* eXecute Disable */
  */
 #define CPUID_EXTFEATURE_SYSCALL   _Bit(11)    /* SYSCALL/sysret */
 #define CPUID_EXTFEATURE_XD       _Bit(20)     /* eXecute Disable */
+
 #define CPUID_EXTFEATURE_RDTSCP           _Bit(27)     /* RDTSCP */
 #define CPUID_EXTFEATURE_EM64T    _Bit(29)     /* Extended Mem 64 Technology */
 
 #define CPUID_EXTFEATURE_RDTSCP           _Bit(27)     /* RDTSCP */
 #define CPUID_EXTFEATURE_EM64T    _Bit(29)     /* Extended Mem 64 Technology */
 
-#define CPUID_EXTFEATURE_LAHF     _HBit(20)    /* LAFH/SAHF instructions */
+#define CPUID_EXTFEATURE_LAHF     _HBit(0)     /* LAHF/SAHF instructions */
 
 /*
  * The CPUID_EXTFEATURE_XXX values define 64-bit values
 
 /*
  * The CPUID_EXTFEATURE_XXX values define 64-bit values
 #define CPUID_MODEL_MEROM      15
 #define CPUID_MODEL_PENRYN     23
 #define CPUID_MODEL_NEHALEM    26
 #define CPUID_MODEL_MEROM      15
 #define CPUID_MODEL_PENRYN     23
 #define CPUID_MODEL_NEHALEM    26
-#define CPUID_MODEL_ATOM       28
 #define CPUID_MODEL_FIELDS     30      /* Lynnfield, Clarksfield, Jasper */
 #define CPUID_MODEL_DALES      31      /* Havendale, Auburndale */
 #define CPUID_MODEL_NEHALEM_EX 46
 #define CPUID_MODEL_FIELDS     30      /* Lynnfield, Clarksfield, Jasper */
 #define CPUID_MODEL_DALES      31      /* Havendale, Auburndale */
 #define CPUID_MODEL_NEHALEM_EX 46
@@ -200,6 +202,7 @@ typedef struct {
 typedef struct {
        boolean_t       sensor;
        boolean_t       dynamic_acceleration;
 typedef struct {
        boolean_t       sensor;
        boolean_t       dynamic_acceleration;
+       boolean_t       invariant_APIC_timer;
        uint32_t        thresholds;
        boolean_t       ACNT_MCNT;
 } cpuid_thermal_leaf_t;
        uint32_t        thresholds;
        boolean_t       ACNT_MCNT;
 } cpuid_thermal_leaf_t;
index 0206d098697420782370ee646559ea0b8a894281..21e974bff2763ca898cbcd4ac78d4bb97bb804b5 100644 (file)
@@ -37,6 +37,7 @@
 #include <kern/cpu_data.h>
 #include <kern/assert.h>
 #include <kern/machine.h>
 #include <kern/cpu_data.h>
 #include <kern/assert.h>
 #include <kern/machine.h>
+#include <kern/debug.h>
 
 #include <vm/vm_map.h>
 #include <vm/vm_kern.h>
 
 #include <vm/vm_map.h>
 #include <vm/vm_kern.h>
@@ -90,12 +91,11 @@ static unsigned lapic_master_error_count = 0;
 static unsigned lapic_error_count_threshold = 5;
 static boolean_t lapic_dont_panic = FALSE;
 
 static unsigned lapic_error_count_threshold = 5;
 static boolean_t lapic_dont_panic = FALSE;
 
-extern int     debug_boot_arg;
-
 /* Base vector for local APIC interrupt sources */
 int lapic_interrupt_base = LAPIC_DEFAULT_INTERRUPT_BASE;
 
 /* Base vector for local APIC interrupt sources */
 int lapic_interrupt_base = LAPIC_DEFAULT_INTERRUPT_BASE;
 
-int            lapic_to_cpu[MAX_CPUS];
+#define                MAX_LAPICIDS    (LAPIC_ID_MAX+1)
+int            lapic_to_cpu[MAX_LAPICIDS];
 int            cpu_to_lapic[MAX_CPUS];
 
 static void
 int            cpu_to_lapic[MAX_CPUS];
 
 static void
@@ -103,15 +103,17 @@ lapic_cpu_map_init(void)
 {
        int     i;
 
 {
        int     i;
 
-       for (i = 0; i < MAX_CPUS; i++) {
-               lapic_to_cpu[i] = -1;
+       for (i = 0; i < MAX_CPUS; i++)
                cpu_to_lapic[i] = -1;
                cpu_to_lapic[i] = -1;
-       }
+       for (i = 0; i < MAX_LAPICIDS; i++)
+               lapic_to_cpu[i] = -1;
 }
 
 void
 lapic_cpu_map(int apic_id, int cpu)
 {
 }
 
 void
 lapic_cpu_map(int apic_id, int cpu)
 {
+       assert(apic_id < MAX_LAPICIDS);
+       assert(cpu < MAX_CPUS);
        cpu_to_lapic[cpu] = apic_id;
        lapic_to_cpu[apic_id] = cpu;
 }
        cpu_to_lapic[cpu] = apic_id;
        lapic_to_cpu[apic_id] = cpu;
 }
@@ -137,7 +139,7 @@ ml_get_apicid(uint32_t cpu)
 uint32_t
 ml_get_cpuid(uint32_t lapic_index)
 {
 uint32_t
 ml_get_cpuid(uint32_t lapic_index)
 {
-       if(lapic_index >= (uint32_t)MAX_CPUS)
+       if(lapic_index >= (uint32_t)MAX_LAPICIDS)
                return 0xFFFFFFFF;      /* Return -1 if cpu too big */
        
        /* Return the cpu ID (or -1 if not configured) */
                return 0xFFFFFFFF;      /* Return -1 if cpu too big */
        
        /* Return the cpu ID (or -1 if not configured) */
@@ -158,7 +160,7 @@ lapic_cpu_map_dump(void)
                kprintf("cpu_to_lapic[%d]: %d\n",
                        i, cpu_to_lapic[i]);
        }
                kprintf("cpu_to_lapic[%d]: %d\n",
                        i, cpu_to_lapic[i]);
        }
-       for (i = 0; i < MAX_CPUS; i++) {
+       for (i = 0; i < MAX_LAPICIDS; i++) {
                if (lapic_to_cpu[i] == -1)
                        continue;
                kprintf("lapic_to_cpu[%d]: %d\n",
                if (lapic_to_cpu[i] == -1)
                        continue;
                kprintf("lapic_to_cpu[%d]: %d\n",
index 6df816c557bfa57eb0540d3685d0329d857dac76..27f1fb52bf573198cc8a8a328d8495e164104061 100644 (file)
@@ -786,10 +786,6 @@ copyio(int copy_type, user_addr_t user_addr, char *kernel_addr,
        }
         pmap = thread->map->pmap;
 
        }
         pmap = thread->map->pmap;
 
-#if CONFIG_DTRACE
-       thread->machine.specFlags |= CopyIOActive;
-#endif /* CONFIG_DTRACE */
-
         if (pmap == kernel_pmap || use_kernel_map) {
 
                kern_vaddr = (vm_offset_t)user_addr;
         if (pmap == kernel_pmap || use_kernel_map) {
 
                kern_vaddr = (vm_offset_t)user_addr;
@@ -819,13 +815,18 @@ copyio(int copy_type, user_addr_t user_addr, char *kernel_addr,
                KERNEL_DEBUG(debug_type | DBG_FUNC_END, (unsigned)kern_vaddr,
                             (unsigned)kernel_addr, (unsigned)nbytes,
                             error | 0x80000000, 0);
                KERNEL_DEBUG(debug_type | DBG_FUNC_END, (unsigned)kern_vaddr,
                             (unsigned)kernel_addr, (unsigned)nbytes,
                             error | 0x80000000, 0);
+               return (error);
+       }
 
 #if CONFIG_DTRACE
 
 #if CONFIG_DTRACE
-       thread->machine.specFlags &= ~CopyIOActive;
+       thread->machine.specFlags |= CopyIOActive;
 #endif /* CONFIG_DTRACE */
 
 #endif /* CONFIG_DTRACE */
 
-               return (error);
+       if ((nbytes && (user_addr + nbytes <= user_addr)) || ((user_addr + nbytes) > vm_map_max(thread->map))) {
+               error = EFAULT;
+               goto done;
        }
        }
+
        user_base = user_addr & ~((user_addr_t)(NBPDE - 1));
        user_offset = (vm_offset_t)(user_addr & (NBPDE - 1));
 
        user_base = user_addr & ~((user_addr_t)(NBPDE - 1));
        user_offset = (vm_offset_t)(user_addr & (NBPDE - 1));
 
@@ -1029,6 +1030,8 @@ copyio_phys(addr64_t source, addr64_t sink, vm_size_t csize, int which)
        }
        window_offset = (char *)((uint32_t)paddr & (PAGE_SIZE - 1));
 
        }
        window_offset = (char *)((uint32_t)paddr & (PAGE_SIZE - 1));
 
+       assert(!((current_thread()->machine.specFlags & CopyIOActive) && ((which & cppvKmap) == 0)));
+
        if (current_thread()->machine.physwindow_busy) {
                pt_entry_t      old_pentry;
 
        if (current_thread()->machine.physwindow_busy) {
                pt_entry_t      old_pentry;
 
index 56fe44b1722b903e98ddc86360c4685c7a5605e5..0efbb917c4609c40fa58e420d71e24afee2cda19 100644 (file)
@@ -113,8 +113,8 @@ machine_idle(void)
 
     if (pmInitDone
        && pmDispatch != NULL
 
     if (pmInitDone
        && pmDispatch != NULL
-       && pmDispatch->cstateMachineIdle != NULL)
-       (*pmDispatch->cstateMachineIdle)(0x7FFFFFFFFFFFFFFFULL);
+       && pmDispatch->MachineIdle != NULL)
+       (*pmDispatch->MachineIdle)(0x7FFFFFFFFFFFFFFFULL);
     else {
        /*
         * If no power management, re-enable interrupts and halt.
     else {
        /*
         * If no power management, re-enable interrupts and halt.
@@ -562,8 +562,10 @@ machine_run_count(uint32_t count)
 }
 
 boolean_t
 }
 
 boolean_t
-machine_cpu_is_inactive(int cpu)
+machine_processor_is_inactive(processor_t processor)
 {
 {
+    int                cpu = processor->cpu_id;
+
     if (pmDispatch != NULL
        && pmDispatch->pmIsCPUUnAvailable != NULL)
        return(pmDispatch->pmIsCPUUnAvailable(cpu_to_lcpu(cpu)));
     if (pmDispatch != NULL
        && pmDispatch->pmIsCPUUnAvailable != NULL)
        return(pmDispatch->pmIsCPUUnAvailable(cpu_to_lcpu(cpu)));
@@ -571,6 +573,43 @@ machine_cpu_is_inactive(int cpu)
        return(FALSE);
 }
 
        return(FALSE);
 }
 
+processor_t
+machine_choose_processor(processor_set_t pset,
+                        processor_t preferred)
+{
+    int                startCPU;
+    int                endCPU;
+    int                preferredCPU;
+    int                chosenCPU;
+
+    if (!pmInitDone)
+       return(preferred);
+
+    if (pset == NULL) {
+       startCPU = -1;
+       endCPU = -1;
+    } else {
+       startCPU = pset->cpu_set_low;
+       endCPU = pset->cpu_set_hi;
+    }
+
+    if (preferred == NULL)
+       preferredCPU = -1;
+    else
+       preferredCPU = preferred->cpu_id;
+
+    if (pmDispatch != NULL
+       && pmDispatch->pmChooseCPU != NULL) {
+       chosenCPU = pmDispatch->pmChooseCPU(startCPU, endCPU, preferredCPU);
+
+       if (chosenCPU == -1)
+           return(NULL);
+       return(cpu_datap(chosenCPU)->cpu_processor);
+    }
+
+    return(preferred);
+}
+
 static uint32_t
 pmGetSavedRunCount(void)
 {
 static uint32_t
 pmGetSavedRunCount(void)
 {
index 5609df50e3860165846b454a724fd61c6a09d896..ff67de670da85b1f1095733299902a8fa884a57e 100644 (file)
@@ -38,7 +38,7 @@
  * This value should be changed each time that pmDsipatch_t or pmCallBacks_t
  * changes.
  */
  * This value should be changed each time that pmDsipatch_t or pmCallBacks_t
  * changes.
  */
-#define PM_DISPATCH_VERSION    18
+#define PM_DISPATCH_VERSION    19
 
 /*
  * Dispatch table for functions that get installed when the power
 
 /*
  * Dispatch table for functions that get installed when the power
@@ -54,7 +54,7 @@ typedef struct
 {
     int                        (*pmCPUStateInit)(void);
     void               (*cstateInit)(void);
 {
     int                        (*pmCPUStateInit)(void);
     void               (*cstateInit)(void);
-    uint64_t           (*cstateMachineIdle)(uint64_t maxIdleDuration);
+    uint64_t           (*MachineIdle)(uint64_t maxIdleDuration);
     uint64_t           (*GetDeadline)(x86_lcpu_t *lcpu);
     uint64_t           (*SetDeadline)(x86_lcpu_t *lcpu, uint64_t);
     void               (*Deadline)(x86_lcpu_t *lcpu);
     uint64_t           (*GetDeadline)(x86_lcpu_t *lcpu);
     uint64_t           (*SetDeadline)(x86_lcpu_t *lcpu, uint64_t);
     void               (*Deadline)(x86_lcpu_t *lcpu);
@@ -75,6 +75,7 @@ typedef struct
     void               (*markAllCPUsOff)(void);
     void               (*pmSetRunCount)(uint32_t count);
     boolean_t          (*pmIsCPUUnAvailable)(x86_lcpu_t *lcpu);
     void               (*markAllCPUsOff)(void);
     void               (*pmSetRunCount)(uint32_t count);
     boolean_t          (*pmIsCPUUnAvailable)(x86_lcpu_t *lcpu);
+    int                        (*pmChooseCPU)(int startCPU, int endCPU, int preferredCPU);
     int                        (*pmIPIHandler)(void *state);
 } pmDispatch_t;
 
     int                        (*pmIPIHandler)(void *state);
 } pmDispatch_t;
 
index 311763f1fbe61db08dd8178d9b154b5adef7ebb0..e7135803a95343ebf4eff3c910988db77b1d2677 100644 (file)
@@ -89,7 +89,6 @@
  */
 
 #include <string.h>
  */
 
 #include <string.h>
-#include <norma_vm.h>
 #include <mach_kdb.h>
 #include <mach_ldebug.h>
 
 #include <mach_kdb.h>
 #include <mach_ldebug.h>
 
@@ -219,143 +218,10 @@ boolean_t pmap_trace = FALSE;
 uint64_t max_preemption_latency_tsc = 0;
 
 
 uint64_t max_preemption_latency_tsc = 0;
 
 
-/*
- *     Private data structures.
- */
-
-/*
- *     For each vm_page_t, there is a list of all currently
- *     valid virtual mappings of that page.  An entry is
- *     a pv_rooted_entry_t; the list is the pv_table.
- *
- *      N.B.  with the new combo rooted/hashed scheme it is
- *      only possibly to remove individual non-rooted entries
- *      if they are found via the hashed chains as there is no
- *      way to unlink the singly linked hashed entries if navigated to
- *      via the queue list off the rooted entries.  Think of it as
- *      hash/walk/pull, keeping track of the prev pointer while walking
- *      the singly linked hash list.  All of this is to save memory and
- *      keep both types of pv_entries as small as possible.
- */
-
-/*
-
-PV HASHING Changes - JK 1/2007
-
-Pve's establish physical to virtual mappings.  These are used for aliasing of a 
-physical page to (potentially many) virtual addresses within pmaps. In the previous 
-implementation the structure of the pv_entries (each 16 bytes in size) was
-
-typedef struct pv_entry {
-    struct pv_entry_t    next;
-    pmap_t                    pmap;
-    vm_map_offset_t   va;
-} *pv_entry_t;
-
-An initial array of these is created at boot time, one per physical page of memory, 
-indexed by the physical page number. Additionally, a pool of entries is created from a 
-pv_zone to be used as needed by pmap_enter() when it is creating new mappings.  
-Originally, we kept this pool around because the code in pmap_enter() was unable to 
-block if it needed an entry and none were available - we'd panic.  Some time ago I 
-restructured the pmap_enter() code so that for user pmaps it can block while zalloc'ing 
-a pv structure and restart, removing a panic from the code (in the case of the kernel 
-pmap we cannot block and still panic, so, we keep a separate hot pool for use only on 
-kernel pmaps).  The pool has not been removed since there is a large performance gain 
-keeping freed pv's around for reuse and not suffering the overhead of zalloc for every new pv we need.
-
-As pmap_enter() created new mappings it linked the new pve's for them off the fixed 
-pv array for that ppn (off the next pointer).  These pve's are accessed for several 
-operations, one of them being address space teardown.  In that case, we basically do this
-
-       for (every page/pte in the space) {
-               calc pve_ptr from the ppn in the pte
-               for (every pv in the list for the ppn) {
-                       if (this pv is for this pmap/vaddr) {
-                               do housekeeping
-                               unlink/free the pv
-                       }
-               }
-       }
-
-The problem arose when we were running, say 8000 (or even 2000) apache or other processes 
-and one or all terminate. The list hanging off each pv array entry could have thousands of 
-entries.  We were continuously linearly searching each of these lists as we stepped through 
-the address space we were tearing down.  Because of the locks we hold, likely taking a cache 
-miss for each node,  and interrupt disabling for MP issues the system became completely 
-unresponsive for many seconds while we did this.
-
-Realizing that pve's are accessed in two distinct ways (linearly running the list by ppn 
-for operations like pmap_page_protect and finding and modifying/removing a single pve as 
-part of pmap_enter processing) has led to modifying the pve structures and databases.
-
-There are now two types of pve structures.  A "rooted" structure which is basically the 
-original structure accessed in an array by ppn, and a ''hashed'' structure accessed on a 
-hash list via a hash of [pmap, vaddr].  These have been designed with the two goals of 
-minimizing wired memory and making the lookup of a ppn faster.  Since a vast majority of 
-pages in the system are not aliased and hence represented by a single pv entry I've kept 
-the rooted entry size as small as possible because there is one of these dedicated for 
-every physical page of memory.  The hashed pve's are larger due to the addition of the hash 
-link and the ppn entry needed for matching while running the hash list to find the entry we 
-are looking for.  This way, only systems that have lots of aliasing (like 2000+ httpd procs) 
-will pay the extra memory price. Both structures have the same first three fields allowing 
-some simplification in the code.
-
-They have these shapes
-
-typedef struct pv_rooted_entry {
-        queue_head_t qlink;
-        vm_map_offset_t va;
-        pmap_t          pmap;
-} *pv_rooted_entry_t;
-
-
-typedef struct pv_hashed_entry {
-  queue_head_t qlink;
-  vm_map_offset_t va;
-  pmap_t        pmap;
-  ppnum_t ppn;
-  struct pv_hashed_entry *nexth;
-} *pv_hashed_entry_t;
-
-The main flow difference is that the code is now aware of the rooted entry and the hashed 
-entries.  Code that runs the pv list still starts with the rooted entry and then continues 
-down the qlink onto the hashed entries.  Code that is looking up a specific pv entry first 
-checks the rooted entry and then hashes and runs the hash list for the match. The hash list 
-lengths are much smaller than the original pv lists that contained all aliases for the specific ppn.
-
-*/
-
-typedef struct pv_rooted_entry {     /* first three entries must match pv_hashed_entry_t */
-        queue_head_t qlink;
-       vm_map_offset_t va;             /* virtual address for mapping */
-       pmap_t          pmap;           /* pmap where mapping lies */
-} *pv_rooted_entry_t;
-
-#define PV_ROOTED_ENTRY_NULL   ((pv_rooted_entry_t) 0)
-
-pv_rooted_entry_t      pv_head_table;          /* array of entries, one per page */
-
-typedef struct pv_hashed_entry {     /* first three entries must match pv_rooted_entry_t */
-  queue_head_t qlink;
-  vm_map_offset_t va;
-  pmap_t        pmap;
-  ppnum_t ppn;
-  struct pv_hashed_entry *nexth;
-} *pv_hashed_entry_t;
-
-#define PV_HASHED_ENTRY_NULL ((pv_hashed_entry_t)0)
-
-#define NPVHASH 4095   /* MUST BE 2^N - 1 */
 pv_hashed_entry_t     *pv_hash_table;  /* hash lists */
 
 uint32_t npvhash = 0;
 
 pv_hashed_entry_t     *pv_hash_table;  /* hash lists */
 
 uint32_t npvhash = 0;
 
-/* #define PV_DEBUG 1   uncomment to enable some PV debugging code */
-#ifdef PV_DEBUG
-#define CHK_NPVHASH() if(0 == npvhash) panic("npvhash uninitialized");
-#else
-#define CHK_NPVHASH()
-#endif
 
 /*
  *     pv_list entries are kept on a list that can only be accessed
 
 /*
  *     pv_list entries are kept on a list that can only be accessed
@@ -373,53 +239,6 @@ int pv_free_count = 0;
 int pv_hashed_free_count = 0;
 int pv_kern_free_count = 0;
 int pv_hashed_kern_free_count = 0;
 int pv_hashed_free_count = 0;
 int pv_kern_free_count = 0;
 int pv_hashed_kern_free_count = 0;
-#define PV_HASHED_LOW_WATER_MARK 5000
-#define PV_HASHED_KERN_LOW_WATER_MARK 100
-#define PV_HASHED_ALLOC_CHUNK 2000
-#define PV_HASHED_KERN_ALLOC_CHUNK 50
-thread_call_t  mapping_adjust_call;
-static thread_call_data_t  mapping_adjust_call_data;
-uint32_t mappingrecurse = 0;
-
-#define        PV_HASHED_ALLOC(pvh_e) { \
-       simple_lock(&pv_hashed_free_list_lock); \
-       if ((pvh_e = pv_hashed_free_list) != 0) { \
-         pv_hashed_free_list = (pv_hashed_entry_t)pvh_e->qlink.next;   \
-            pv_hashed_free_count--; \
-            if (pv_hashed_free_count < PV_HASHED_LOW_WATER_MARK) \
-              if (hw_compare_and_store(0,1,(u_int *)&mappingrecurse)) \
-                thread_call_enter(mapping_adjust_call); \
-       } \
-       simple_unlock(&pv_hashed_free_list_lock); \
-}
-
-#define        PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pv_cnt) {   \
-       simple_lock(&pv_hashed_free_list_lock); \
-       pvh_et->qlink.next = (queue_entry_t)pv_hashed_free_list;        \
-       pv_hashed_free_list = pvh_eh; \
-        pv_hashed_free_count += pv_cnt; \
-       simple_unlock(&pv_hashed_free_list_lock); \
-}
-
-#define        PV_HASHED_KERN_ALLOC(pvh_e) { \
-       simple_lock(&pv_hashed_kern_free_list_lock); \
-       if ((pvh_e = pv_hashed_kern_free_list) != 0) { \
-         pv_hashed_kern_free_list = (pv_hashed_entry_t)pvh_e->qlink.next;      \
-            pv_hashed_kern_free_count--; \
-            if (pv_hashed_kern_free_count < PV_HASHED_KERN_LOW_WATER_MARK) \
-              if (hw_compare_and_store(0,1,(u_int *)&mappingrecurse)) \
-                thread_call_enter(mapping_adjust_call); \
-       } \
-       simple_unlock(&pv_hashed_kern_free_list_lock); \
-}
-
-#define        PV_HASHED_KERN_FREE_LIST(pvh_eh, pvh_et, pv_cnt) {       \
-       simple_lock(&pv_hashed_kern_free_list_lock); \
-       pvh_et->qlink.next = (queue_entry_t)pv_hashed_kern_free_list;   \
-       pv_hashed_kern_free_list = pvh_eh; \
-        pv_hashed_kern_free_count += pv_cnt; \
-       simple_unlock(&pv_hashed_kern_free_list_lock); \
-}
 
 zone_t         pv_hashed_list_zone;    /* zone of pv_hashed_entry structures */
 
 
 zone_t         pv_hashed_list_zone;    /* zone of pv_hashed_entry structures */
 
@@ -447,23 +266,6 @@ boolean_t  pmap_initialized = FALSE;/* Has pmap_init completed? */
 static struct vm_object kptobj_object_store;
 static vm_object_t kptobj;
 
 static struct vm_object kptobj_object_store;
 static vm_object_t kptobj;
 
-/*
- *     Index into pv_head table, its lock bits, and the modify/reference and managed bits
- */
-
-#define pa_index(pa)   (i386_btop(pa))
-#define ppn_to_pai(ppn)        ((int)ppn)
-
-#define pai_to_pvh(pai)                (&pv_head_table[pai])
-#define lock_pvh_pai(pai)      bit_lock(pai, (void *)pv_lock_table)
-#define unlock_pvh_pai(pai)    bit_unlock(pai, (void *)pv_lock_table)
-
-#define pvhashidx(pmap, va) (((uint32_t)pmap ^ ((uint32_t)((uint64_t)va >> PAGE_SHIFT) & 0xFFFFFFFF)) & npvhash)
-#define pvhash(idx)         (&pv_hash_table[idx])
-
-#define lock_hash_hash(hash)           bit_lock(hash, (void *)pv_hash_lock_table)
-#define unlock_hash_hash(hash) bit_unlock(hash, (void *)pv_hash_lock_table)
-
 /*
  *     Array of physical page attribites for managed pages.
  *     One byte per physical page.
 /*
  *     Array of physical page attribites for managed pages.
  *     One byte per physical page.
@@ -596,44 +398,6 @@ static int nkpt;
 pt_entry_t     *DMAP1, *DMAP2;
 caddr_t         DADDR1;
 caddr_t         DADDR2;
 pt_entry_t     *DMAP1, *DMAP2;
 caddr_t         DADDR1;
 caddr_t         DADDR2;
-
-static inline
-void pmap_pvh_unlink(pv_hashed_entry_t pv);
-
-/*
- * unlinks the pv_hashed_entry_t pvh from the singly linked hash chain.
- * properly deals with the anchor.
- * must be called with the hash locked, does not unlock it
- */
-
-static inline
-void pmap_pvh_unlink(pv_hashed_entry_t pvh)
-{
-  pv_hashed_entry_t curh;
-  pv_hashed_entry_t *pprevh;
-  int pvhash_idx;
-
-  CHK_NPVHASH();
-  pvhash_idx = pvhashidx(pvh->pmap, pvh->va);
-
-  pprevh = pvhash(pvhash_idx);
-
-#if PV_DEBUG
-  if (NULL == *pprevh) panic("pvh_unlink null anchor"); /* JK DEBUG */
-#endif
-  curh = *pprevh;
-
-  while (PV_HASHED_ENTRY_NULL != curh) {
-    if (pvh == curh)
-      break;
-    pprevh = &curh->nexth;
-    curh = curh->nexth;
-  }
-  if (PV_HASHED_ENTRY_NULL == curh) panic("pmap_pvh_unlink no pvh");
-  *pprevh = pvh->nexth;
-  return;
-}
-
 /*
  * for legacy, returns the address of the pde entry.
  * for 64 bit, causes the pdpt page containing the pde entry to be mapped,
 /*
  * for legacy, returns the address of the pde entry.
  * for 64 bit, causes the pdpt page containing the pde entry to be mapped,
@@ -1550,7 +1314,7 @@ pmap_create(
                va = (vm_offset_t)p->dirbase;
                p->pdirbase = kvtophys(va);
 
                va = (vm_offset_t)p->dirbase;
                p->pdirbase = kvtophys(va);
 
-               template = cpu_64bit ? INTEL_PTE_VALID|INTEL_PTE_RW|INTEL_PTE_USER|INTEL_PTE_REF : INTEL_PTE_VALID;
+               template = INTEL_PTE_VALID;
                for (i = 0; i< NPGPTD; i++, pdpt++ ) {
                        pmap_paddr_t pa;
                        pa = (pmap_paddr_t) kvtophys((vm_offset_t)(va + i386_ptob(i)));
                for (i = 0; i< NPGPTD; i++, pdpt++ ) {
                        pmap_paddr_t pa;
                        pa = (pmap_paddr_t) kvtophys((vm_offset_t)(va + i386_ptob(i)));
@@ -1588,7 +1352,7 @@ pmap_create(
                /* uber space points to uber mapped kernel */
                s = splhigh();
                pml4p = pmap64_pml4(p, 0ULL);
                /* uber space points to uber mapped kernel */
                s = splhigh();
                pml4p = pmap64_pml4(p, 0ULL);
-               pmap_store_pte((pml4p+KERNEL_UBER_PML4_INDEX),*kernel_pmap->pm_pml4);
+               pmap_store_pte((pml4p+KERNEL_UBER_PML4_INDEX), *kernel_pmap->pm_pml4);
 
 
                if (!is_64bit) {
 
 
                if (!is_64bit) {
@@ -1815,231 +1579,6 @@ pmap_reference(
        }
 }
 
        }
 }
 
-/*
- *     Remove a range of hardware page-table entries.
- *     The entries given are the first (inclusive)
- *     and last (exclusive) entries for the VM pages.
- *     The virtual address is the va for the first pte.
- *
- *     The pmap must be locked.
- *     If the pmap is not the kernel pmap, the range must lie
- *     entirely within one pte-page.  This is NOT checked.
- *     Assumes that the pte-page exists.
- */
-
-void
-pmap_remove_range(
-       pmap_t                  pmap,
-       vm_map_offset_t         start_vaddr,
-       pt_entry_t              *spte,
-       pt_entry_t              *epte)
-{
-       register pt_entry_t     *cpte;
-       pv_hashed_entry_t       pvh_et = PV_HASHED_ENTRY_NULL;
-       pv_hashed_entry_t       pvh_eh = PV_HASHED_ENTRY_NULL;
-       pv_hashed_entry_t       pvh_e;
-       int                     pvh_cnt = 0;
-       int                     num_removed, num_unwired, num_found;
-       int                     pai;
-       pmap_paddr_t            pa;
-       vm_map_offset_t         vaddr;
-       int                     pvhash_idx;
-       uint32_t                pv_cnt;
-
-       num_removed = 0;
-       num_unwired = 0;
-       num_found   = 0;
-
-       if (pmap != kernel_pmap &&
-           pmap->pm_task_map == TASK_MAP_32BIT &&
-           start_vaddr >= HIGH_MEM_BASE) {
-               /*
-                * The range is in the "high_shared_pde" which is shared
-                * between the kernel and all 32-bit tasks.  It holds
-                * the 32-bit commpage but also the trampolines, GDT, etc...
-                * so we can't let user tasks remove anything from it.
-                */
-               return;
-       }
-
-       /* invalidate the PTEs first to "freeze" them */
-       for (cpte = spte, vaddr = start_vaddr;
-            cpte < epte;
-            cpte++, vaddr += PAGE_SIZE_64) {
-
-           pa = pte_to_pa(*cpte);
-           if (pa == 0)
-               continue;
-           num_found++;
-
-           if (iswired(*cpte))
-               num_unwired++;
-
-           pai = pa_index(pa);
-
-           if (!managed_page(pai)) {
-               /*
-                *      Outside range of managed physical memory.
-                *      Just remove the mappings.
-                */
-               pmap_store_pte(cpte, 0);
-               continue;
-           }
-
-           /* invalidate the PTE */ 
-           pmap_update_pte(cpte, *cpte, (*cpte & ~INTEL_PTE_VALID));
-       }
-
-       if (num_found == 0) {
-               /* nothing was changed: we're done */
-               goto update_counts;
-       }
-
-       /* propagate the invalidates to other CPUs */
-
-       PMAP_UPDATE_TLBS(pmap, start_vaddr, vaddr);
-
-       for (cpte = spte, vaddr = start_vaddr;
-            cpte < epte;
-            cpte++, vaddr += PAGE_SIZE_64) {
-
-           pa = pte_to_pa(*cpte);
-           if (pa == 0)
-               continue;
-
-           pai = pa_index(pa);
-
-           LOCK_PVH(pai);
-
-           pa = pte_to_pa(*cpte);
-           if (pa == 0) {
-             UNLOCK_PVH(pai);
-             continue;
-           }
-             
-           num_removed++;
-
-           /*
-            *  Get the modify and reference bits, then
-            *  nuke the entry in the page table
-            */
-           /* remember reference and change */
-           pmap_phys_attributes[pai] |=
-                   (char)(*cpte & (PHYS_MODIFIED | PHYS_REFERENCED));
-           /* completely invalidate the PTE */
-           pmap_store_pte(cpte, 0);
-
-           /*
-            *  Remove the mapping from the pvlist for
-            *  this physical page.
-            */
-           {
-             pv_rooted_entry_t pv_h;
-             pv_hashed_entry_t *pprevh;
-             ppnum_t ppn = (ppnum_t)pai;
-
-               pv_h = pai_to_pvh(pai);
-               pvh_e = PV_HASHED_ENTRY_NULL;
-               if (pv_h->pmap == PMAP_NULL)
-                   panic("pmap_remove_range: null pv_list!");
-
-               if (pv_h->va == vaddr && pv_h->pmap == pmap) { /* rooted or not */
-                   /*
-                    * Header is the pv_rooted_entry. We can't free that. If there is a queued
-                    * entry after this one we remove that
-                    * from the ppn queue, we remove it from the hash chain
-                    * and copy it to the rooted entry. Then free it instead.
-                    */
-
-                 pvh_e = (pv_hashed_entry_t)queue_next(&pv_h->qlink);
-                 if (pv_h != (pv_rooted_entry_t)pvh_e) {  /* any queued after rooted? */
-                   CHK_NPVHASH();
-                   pvhash_idx = pvhashidx(pvh_e->pmap,pvh_e->va);
-                   LOCK_PV_HASH(pvhash_idx);
-                   remque(&pvh_e->qlink);
-                   {
-                     pprevh = pvhash(pvhash_idx);
-                     if (PV_HASHED_ENTRY_NULL == *pprevh) {
-                       panic("pmap_remove_range empty hash removing rooted pv");
-                     }
-                   }
-                   pmap_pvh_unlink(pvh_e);
-                   UNLOCK_PV_HASH(pvhash_idx);
-                   pv_h->pmap = pvh_e->pmap;
-                   pv_h->va = pvh_e->va;   /* dispose of pvh_e */
-                 } else {  /* none queued after rooted */
-                   pv_h->pmap = PMAP_NULL;
-                   pvh_e = PV_HASHED_ENTRY_NULL;
-                 }   /* any queued after rooted */
-
-               } else { /* rooted or not */
-                 /* not removing rooted pv. find it on hash chain, remove from ppn queue and
-                  * hash chain and free it */
-                 CHK_NPVHASH();
-                 pvhash_idx = pvhashidx(pmap,vaddr);
-                 LOCK_PV_HASH(pvhash_idx);
-                 pprevh = pvhash(pvhash_idx);
-                 if (PV_HASHED_ENTRY_NULL == *pprevh) {
-                   panic("pmap_remove_range empty hash removing hashed pv");
-                   }
-                 pvh_e = *pprevh;
-                 pmap_pv_hashlist_walks++;
-                 pv_cnt = 0;
-                 while (PV_HASHED_ENTRY_NULL != pvh_e) {
-                       pv_cnt++;
-                       if (pvh_e->pmap == pmap && pvh_e->va == vaddr && pvh_e->ppn == ppn) break;
-                       pprevh = &pvh_e->nexth;
-                       pvh_e = pvh_e->nexth;
-                 }
-                 pmap_pv_hashlist_cnts += pv_cnt;
-                 if (pmap_pv_hashlist_max < pv_cnt) pmap_pv_hashlist_max = pv_cnt;
-                 if (PV_HASHED_ENTRY_NULL == pvh_e) panic("pmap_remove_range pv not on hash");
-                 *pprevh = pvh_e->nexth;
-                 remque(&pvh_e->qlink);
-                 UNLOCK_PV_HASH(pvhash_idx);
-
-               } /* rooted or not */
-
-               UNLOCK_PVH(pai);
-
-               if (pvh_e != PV_HASHED_ENTRY_NULL) {
-                 pvh_e->qlink.next = (queue_entry_t)pvh_eh;
-                 pvh_eh = pvh_e;
-
-                 if (pvh_et == PV_HASHED_ENTRY_NULL) {
-                   pvh_et = pvh_e;
-                 }
-
-                 pvh_cnt++;
-               }
-
-           } /* removing mappings for this phy page */
-       } /* for loop */
-       
-       if (pvh_eh != PV_HASHED_ENTRY_NULL) {
-           PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pvh_cnt);
-       }
-
-update_counts:
-       /*
-        *      Update the counts
-        */
-#if TESTING
-       if (pmap->stats.resident_count < num_removed)
-               panic("pmap_remove_range: resident_count");
-#endif
-       assert(pmap->stats.resident_count >= num_removed);
-       OSAddAtomic(-num_removed,  &pmap->stats.resident_count);
-
-#if TESTING
-       if (pmap->stats.wired_count < num_unwired)
-               panic("pmap_remove_range: wired_count");
-#endif
-       assert(pmap->stats.wired_count >= num_unwired);
-       OSAddAtomic(-num_unwired,  &pmap->stats.wired_count);
-
-       return;
-}
 
 /*
  *     Remove phys addr if mapped in specified map
 
 /*
  *     Remove phys addr if mapped in specified map
@@ -2055,290 +1594,6 @@ pmap_remove_some_phys(
 
 }
 
 
 }
 
-/*
- *     Remove the given range of addresses
- *     from the specified map.
- *
- *     It is assumed that the start and end are properly
- *     rounded to the hardware page size.
- */
-
-
-void
-pmap_remove(
-       pmap_t          map,
-       addr64_t        s64,
-       addr64_t        e64)
-{
-       pt_entry_t      *pde;
-       pt_entry_t      *spte, *epte;
-       addr64_t        l64;
-       addr64_t        orig_s64;
-       uint64_t        deadline;
-
-       pmap_intr_assert();
-
-       if (map == PMAP_NULL || s64 == e64)
-               return;
-       PMAP_TRACE(PMAP_CODE(PMAP__REMOVE) | DBG_FUNC_START,
-                  (int) map,
-                  (int) (s64>>32), (int) s64,
-                  (int) (e64>>32), (int) e64);
-
-       PMAP_LOCK(map);
-
-#if 0
-       /*
-        * Check that address range in the kernel does not overlap the stacks.
-        * We initialize local static min/max variables once to avoid making
-        * 2 function calls for every remove. Note also that these functions
-        * both return 0 before kernel stacks have been initialized, and hence
-        * the panic is not triggered in this case.
-        */
-       if (map == kernel_pmap) {
-               static vm_offset_t      kernel_stack_min = 0;
-               static vm_offset_t      kernel_stack_max = 0;
-
-               if (kernel_stack_min == 0) {
-                       kernel_stack_min = min_valid_stack_address();
-                       kernel_stack_max = max_valid_stack_address();
-               }
-               if  ((kernel_stack_min <= s64 && s64 <  kernel_stack_max) ||
-                    (kernel_stack_min <  e64 && e64 <= kernel_stack_max))
-                       panic("pmap_remove() attempted in kernel stack");
-       }
-#else
-
-       /*
-        * The values of kernel_stack_min and kernel_stack_max are no longer
-        * relevant now that we allocate kernel stacks anywhere in the kernel map,
-        * so the old code above no longer applies.  If we wanted to check that
-        * we weren't removing a mapping of a page in a kernel stack we'd have to
-        * mark the PTE with an unused bit and check that here.
-        */
-
-#endif
-
-       deadline = rdtsc64() + max_preemption_latency_tsc;
-
-       orig_s64 = s64;
-
-       while (s64 < e64) {
-           l64 = (s64 + pde_mapped_size) & ~(pde_mapped_size-1);
-           if (l64 > e64)
-               l64 = e64;
-           pde = pmap_pde(map, s64);
-
-           if (pde && (*pde & INTEL_PTE_VALID)) {
-               spte = (pt_entry_t *)pmap_pte(map, (s64 & ~(pde_mapped_size-1)));
-               spte = &spte[ptenum(s64)];
-               epte = &spte[intel_btop(l64-s64)];
-
-               pmap_remove_range(map, s64, spte, epte);
-           }
-           s64 = l64;
-           pde++;
-
-           if (s64 < e64 && rdtsc64() >= deadline) {
-             PMAP_UNLOCK(map)
-               PMAP_LOCK(map)
-
-             deadline = rdtsc64() + max_preemption_latency_tsc;
-           }
-
-       }
-
-       PMAP_UNLOCK(map);
-
-       PMAP_TRACE(PMAP_CODE(PMAP__REMOVE) | DBG_FUNC_END,
-                  (int) map, 0, 0, 0, 0);
-
-}
-
-/*
- *     Routine:        pmap_page_protect
- *
- *     Function:
- *             Lower the permission for all mappings to a given
- *             page.
- */
-void
-pmap_page_protect(
-        ppnum_t         pn,
-       vm_prot_t       prot)
-{
-       pv_hashed_entry_t               pvh_eh = PV_HASHED_ENTRY_NULL;
-       pv_hashed_entry_t               pvh_et = PV_HASHED_ENTRY_NULL;
-       pv_hashed_entry_t       nexth;
-       int                     pvh_cnt = 0;
-       pv_rooted_entry_t               pv_h;
-       pv_rooted_entry_t               pv_e;
-       pv_hashed_entry_t       pvh_e;
-       pt_entry_t              *pte;
-       int                     pai;
-       register pmap_t         pmap;
-       boolean_t               remove;
-       int                     pvhash_idx;
-
-       pmap_intr_assert();
-       assert(pn != vm_page_fictitious_addr);
-       if (pn == vm_page_guard_addr)
-               return;
-
-       pai = ppn_to_pai(pn);
-
-       if (!managed_page(pai)) {
-           /*
-            *  Not a managed page.
-            */
-           return;
-       }
-
-       PMAP_TRACE(PMAP_CODE(PMAP__PAGE_PROTECT) | DBG_FUNC_START,
-                  (int) pn, (int) prot, 0, 0, 0);
-
-       /*
-        * Determine the new protection.
-        */
-       switch (prot) {
-           case VM_PROT_READ:
-           case VM_PROT_READ|VM_PROT_EXECUTE:
-               remove = FALSE;
-               break;
-           case VM_PROT_ALL:
-               return; /* nothing to do */
-           default:
-               remove = TRUE;
-               break;
-       }
-
-       pv_h = pai_to_pvh(pai);
-
-       LOCK_PVH(pai);
-
-
-       /*
-        * Walk down PV list, changing or removing all mappings.
-        */
-       if (pv_h->pmap != PMAP_NULL) {
-
-           pv_e = pv_h;
-           pvh_e = (pv_hashed_entry_t)pv_e; /* cheat */
-
-           do {
-               register vm_map_offset_t vaddr;
-               pmap = pv_e->pmap;
-
-               vaddr = pv_e->va;
-               pte = pmap_pte(pmap, vaddr);
-               
-               if (0 == pte) {
-                       panic("pmap_page_protect: Missing PTE, pmap: %p, pn: 0x%x vaddr: 0x%llx, prot: %d kernel_pmap: %p", pmap, pn, vaddr, prot, kernel_pmap);
-               }
-
-               nexth = (pv_hashed_entry_t)queue_next(&pvh_e->qlink);  /* if there is one */
-
-               /*
-                * Remove the mapping if new protection is NONE
-                * or if write-protecting a kernel mapping.
-                */
-               if (remove || pmap == kernel_pmap) {
-                   /*
-                    * Remove the mapping, collecting any modify bits.
-                    */
-                   pmap_update_pte(pte, *pte, (*pte & ~INTEL_PTE_VALID));
-
-                   PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
-
-                   pmap_phys_attributes[pai] |= *pte & (PHYS_MODIFIED|PHYS_REFERENCED);
-
-                   pmap_store_pte(pte, 0);
-
-#if TESTING
-                   if (pmap->stats.resident_count < 1)
-                       panic("pmap_page_protect: resident_count");
-#endif
-                   assert(pmap->stats.resident_count >= 1);
-                   OSAddAtomic(-1,  &pmap->stats.resident_count);
-
-                   /*
-                    * Deal with the pv_rooted_entry.
-                    */
-
-                   if (pv_e == pv_h) {
-                       /*
-                        * Fix up head later.
-                        */
-                       pv_h->pmap = PMAP_NULL;
-                   }
-                   else {
-                       /*
-                        * Delete this entry.
-                        */
-                     CHK_NPVHASH();
-                     pvhash_idx = pvhashidx(pvh_e->pmap,pvh_e->va);
-                     LOCK_PV_HASH(pvhash_idx);
-                     remque(&pvh_e->qlink);
-                     pmap_pvh_unlink(pvh_e);
-                     UNLOCK_PV_HASH(pvhash_idx);
-
-                     pvh_e->qlink.next = (queue_entry_t)pvh_eh;
-                       pvh_eh = pvh_e;
-
-                       if (pvh_et == PV_HASHED_ENTRY_NULL)
-                           pvh_et = pvh_e;
-                       pvh_cnt++;
-                   }
-               } else {
-                   /*
-                    * Write-protect.
-                    */
-                   pmap_update_pte(pte, *pte, (*pte & ~INTEL_PTE_WRITE));
-                   PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
-               }
-
-               pvh_e = nexth;
-           } while ((pv_e = (pv_rooted_entry_t)nexth) != pv_h);
-
-
-           /*
-            * If pv_head mapping was removed, fix it up.
-            */
-
-           if (pv_h->pmap == PMAP_NULL) {
-             pvh_e = (pv_hashed_entry_t)queue_next(&pv_h->qlink);
-
-             if (pvh_e != (pv_hashed_entry_t)pv_h) {
-               CHK_NPVHASH();
-               pvhash_idx = pvhashidx(pvh_e->pmap,pvh_e->va);
-               LOCK_PV_HASH(pvhash_idx);
-               remque(&pvh_e->qlink);
-               pmap_pvh_unlink(pvh_e);
-               UNLOCK_PV_HASH(pvhash_idx);
-                 pv_h->pmap = pvh_e->pmap;
-                 pv_h->va = pvh_e->va;
-                 pvh_e->qlink.next = (queue_entry_t)pvh_eh;
-                   pvh_eh = pvh_e;
-
-                   if (pvh_et == PV_HASHED_ENTRY_NULL)
-                       pvh_et = pvh_e;
-                   pvh_cnt++;
-               }
-           }
-       }
-       if (pvh_eh != PV_HASHED_ENTRY_NULL) {
-           PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pvh_cnt);
-       }
-
-       UNLOCK_PVH(pai);
-
-       PMAP_TRACE(PMAP_CODE(PMAP__PAGE_PROTECT) | DBG_FUNC_END,
-                  0, 0, 0, 0, 0);
-
-}
-
-
 /*
  *     Routine:
  *             pmap_disconnect
 /*
  *     Routine:
  *             pmap_disconnect
@@ -2459,427 +1714,6 @@ pmap_map_block(
 }
 
 
 }
 
 
-/*
- *     Insert the given physical page (p) at
- *     the specified virtual address (v) in the
- *     target physical map with the protection requested.
- *
- *     If specified, the page will be wired down, meaning
- *     that the related pte cannot be reclaimed.
- *
- *     NB:  This is the only routine which MAY NOT lazy-evaluate
- *     or lose information.  That is, this routine must actually
- *     insert this page into the given map NOW.
- */
-void
-pmap_enter(
-       register pmap_t         pmap,
-       vm_map_offset_t         vaddr,
-       ppnum_t                 pn,
-       vm_prot_t               prot,
-       unsigned int            flags,
-       boolean_t               wired)
-{
-       register pt_entry_t     *pte;
-       register pv_rooted_entry_t      pv_h;
-       register int            pai;
-       pv_hashed_entry_t               pvh_e;
-       pv_hashed_entry_t               pvh_new;
-       pv_hashed_entry_t       *hashp;
-       pt_entry_t              template;
-       pmap_paddr_t            old_pa;
-       pmap_paddr_t             pa = (pmap_paddr_t)i386_ptob(pn);
-       boolean_t               need_tlbflush = FALSE;
-       boolean_t               set_NX;
-       char                    oattr;
-       int                     pvhash_idx;
-       uint32_t                pv_cnt;
-       boolean_t               old_pa_locked;
-
-       pmap_intr_assert();
-       assert(pn != vm_page_fictitious_addr);
-       if (pmap_debug)
-               printf("pmap(%qx, %x)\n", vaddr, pn);
-       if (pmap == PMAP_NULL)
-               return;
-       if (pn == vm_page_guard_addr)
-               return;
-
-       PMAP_TRACE(PMAP_CODE(PMAP__ENTER) | DBG_FUNC_START,
-                  (int) pmap,
-                  (int) (vaddr>>32), (int) vaddr,
-                  (int) pn, prot);
-
-       if ( (prot & VM_PROT_EXECUTE) || !nx_enabled || !pmap->nx_enabled )
-               set_NX = FALSE;
-       else
-               set_NX = TRUE;
-       
-       /*
-        *      Must allocate a new pvlist entry while we're unlocked;
-        *      zalloc may cause pageout (which will lock the pmap system).
-        *      If we determine we need a pvlist entry, we will unlock
-        *      and allocate one.  Then we will retry, throughing away
-        *      the allocated entry later (if we no longer need it).
-        */
-
-       pvh_new = PV_HASHED_ENTRY_NULL;
-Retry:
-       pvh_e = PV_HASHED_ENTRY_NULL;
-
-       PMAP_LOCK(pmap);
-
-       /*
-        *      Expand pmap to include this pte.  Assume that
-        *      pmap is always expanded to include enough hardware
-        *      pages to map one VM page.
-        */
-
-       while ((pte = pmap_pte(pmap, vaddr)) == PT_ENTRY_NULL) {
-               /*
-                *      Must unlock to expand the pmap.
-                */
-               PMAP_UNLOCK(pmap);
-               pmap_expand(pmap, vaddr); /* going to grow pde level page(s) */
-               PMAP_LOCK(pmap);
-       }
-
-       old_pa = pte_to_pa(*pte);
-       pai = pa_index(old_pa);
-       old_pa_locked = FALSE;
-
-       /*
-        * if we have a previous managed page, lock the pv entry now. after
-        * we lock it, check to see if someone beat us to the lock and if so
-        * drop the lock
-        */
-
-       if ((0 != old_pa) && managed_page(pai)) {
-         LOCK_PVH(pai);
-         old_pa_locked = TRUE;
-         old_pa = pte_to_pa(*pte);
-         if (0 == old_pa) {
-           UNLOCK_PVH(pai);  /* some other path beat us to it */
-           old_pa_locked = FALSE;
-         }
-       }
-
-
-       /*
-        *      Special case if the incoming physical page is already mapped
-        *      at this address.
-        */
-       if (old_pa == pa) {
-
-           /*
-            *  May be changing its wired attribute or protection
-            */
-
-           template = pa_to_pte(pa) | INTEL_PTE_VALID;
-
-           if(VM_MEM_NOT_CACHEABLE == (flags & (VM_MEM_NOT_CACHEABLE | VM_WIMG_USE_DEFAULT))) {
-               if(!(flags & VM_MEM_GUARDED))
-                       template |= INTEL_PTE_PTA;
-               template |= INTEL_PTE_NCACHE;
-           }
-
-           if (pmap != kernel_pmap)
-               template |= INTEL_PTE_USER;
-           if (prot & VM_PROT_WRITE)
-               template |= INTEL_PTE_WRITE;
-
-           if (set_NX == TRUE)
-               template |= INTEL_PTE_NX;
-
-           if (wired) {
-               template |= INTEL_PTE_WIRED;
-               if (!iswired(*pte))
-                   OSAddAtomic(+1,  &pmap->stats.wired_count);
-           }
-           else {
-               if (iswired(*pte)) {
-                   assert(pmap->stats.wired_count >= 1);
-                   OSAddAtomic(-1,  &pmap->stats.wired_count);
-               }
-           }
-
-           /* store modified PTE and preserve RC bits */ 
-           pmap_update_pte(pte, *pte, template | (*pte & (INTEL_PTE_REF | INTEL_PTE_MOD)));
-           if (old_pa_locked) {
-             UNLOCK_PVH(pai);
-             old_pa_locked = FALSE;
-           }
-           need_tlbflush = TRUE;
-           goto Done;
-       }
-
-       /*
-        *      Outline of code from here:
-        *         1) If va was mapped, update TLBs, remove the mapping
-        *            and remove old pvlist entry.
-        *         2) Add pvlist entry for new mapping
-        *         3) Enter new mapping.
-        *
-        *      If the old physical page is not managed step 1) is skipped
-        *      (except for updating the TLBs), and the mapping is
-        *      overwritten at step 3).  If the new physical page is not
-        *      managed, step 2) is skipped.
-        */
-
-       if (old_pa != (pmap_paddr_t) 0) {
-
-           /*
-            *  Don't do anything to pages outside valid memory here.
-            *  Instead convince the code that enters a new mapping
-            *  to overwrite the old one.
-            */
-
-           /* invalidate the PTE */ 
-           pmap_update_pte(pte, *pte, (*pte & ~INTEL_PTE_VALID));
-           /* propagate invalidate everywhere */
-           PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
-           /* remember reference and change */
-           oattr = (char)(*pte & (PHYS_MODIFIED | PHYS_REFERENCED));
-           /* completely invalidate the PTE */
-           pmap_store_pte(pte, 0);
-
-           if (managed_page(pai)) {
-#if TESTING
-               if (pmap->stats.resident_count < 1)
-                   panic("pmap_enter: resident_count");
-#endif
-               assert(pmap->stats.resident_count >= 1);
-               OSAddAtomic(-1,  &pmap->stats.resident_count);
-
-               if (iswired(*pte)) {
-
-#if TESTING
-                   if (pmap->stats.wired_count < 1)
-                       panic("pmap_enter: wired_count");
-#endif
-                   assert(pmap->stats.wired_count >= 1);
-                   OSAddAtomic(-1,  &pmap->stats.wired_count);
-               }
-
-               pmap_phys_attributes[pai] |= oattr;
-               /*
-                *      Remove the mapping from the pvlist for
-                *      this physical page.
-                *      We'll end up with either a rooted pv or a
-                *      hashed pv
-                */
-               {
-
-                   pv_h = pai_to_pvh(pai);
-
-                   if (pv_h->pmap == PMAP_NULL) {
-                       panic("pmap_enter: null pv_list!");
-                   }
-
-                   if (pv_h->va == vaddr && pv_h->pmap == pmap) {
-                       /*
-                        * Header is the pv_rooted_entry.  
-                        * If there is a next one, copy it to the
-                        * header and free the next one (we cannot
-                        * free the header)
-                        */
-                     pvh_e = (pv_hashed_entry_t)queue_next(&pv_h->qlink);
-                     if (pvh_e != (pv_hashed_entry_t)pv_h) {
-                       pvhash_idx = pvhashidx(pvh_e->pmap, pvh_e->va);
-                       LOCK_PV_HASH(pvhash_idx);
-                         remque(&pvh_e->qlink);
-                         pmap_pvh_unlink(pvh_e);
-                         UNLOCK_PV_HASH(pvhash_idx);
-                         pv_h->pmap = pvh_e->pmap;
-                         pv_h->va = pvh_e->va;
-                       }
-                     else {
-                       pv_h->pmap = PMAP_NULL;
-                       pvh_e = PV_HASHED_ENTRY_NULL;
-                     }
-                   }
-                   else {
-                     pv_hashed_entry_t *pprevh;
-                     ppnum_t old_ppn;
-                     /* wasn't the rooted pv - hash, find it, and unlink it */
-                     old_ppn = (ppnum_t)pa_index(old_pa);
-                     CHK_NPVHASH();
-                     pvhash_idx = pvhashidx(pmap,vaddr);
-                     LOCK_PV_HASH(pvhash_idx);
-                     pprevh = pvhash(pvhash_idx);
-#if PV_DEBUG
-                     if (NULL==pprevh)panic("pmap enter 1");
-#endif
-                     pvh_e = *pprevh;
-                     pmap_pv_hashlist_walks++;
-                     pv_cnt = 0;
-                     while (PV_HASHED_ENTRY_NULL != pvh_e) {
-                       pv_cnt++;
-                       if (pvh_e->pmap == pmap && pvh_e->va == vaddr && pvh_e->ppn == old_ppn) break;
-                       pprevh = &pvh_e->nexth;
-                       pvh_e = pvh_e->nexth;
-                     }
-                     pmap_pv_hashlist_cnts += pv_cnt;
-                     if (pmap_pv_hashlist_max < pv_cnt) pmap_pv_hashlist_max = pv_cnt;
-                     if (PV_HASHED_ENTRY_NULL == pvh_e) panic("pmap_enter: pv not in hash list");
-                     if(NULL==pprevh)panic("pmap enter 2");
-                     *pprevh = pvh_e->nexth;
-                     remque(&pvh_e->qlink);
-                     UNLOCK_PV_HASH(pvhash_idx);
-                   }
-               }
-           }
-           else {
-               /*
-                *      old_pa is not managed.
-                *      Do removal part of accounting.
-                */
-
-               if (iswired(*pte)) {
-                   assert(pmap->stats.wired_count >= 1);
-                   OSAddAtomic(-1,  &pmap->stats.wired_count);
-               }
-           }
-       }
-
-       /*
-        * if we had a previously managed paged locked, unlock it now
-        */
-
-       if (old_pa_locked) {
-         UNLOCK_PVH(pai);
-         old_pa_locked = FALSE;
-       }
-
-       pai = pa_index(pa);     /* now working with new incoming phys page */
-       if (managed_page(pai)) {
-
-           /*
-            *  Step 2) Enter the mapping in the PV list for this
-            *  physical page.
-            */
-           pv_h = pai_to_pvh(pai);
-
-           LOCK_PVH(pai);
-
-           if (pv_h->pmap == PMAP_NULL) {
-               /*
-                *      No mappings yet, use  rooted pv
-                */
-               pv_h->va = vaddr;
-               pv_h->pmap = pmap;
-               queue_init(&pv_h->qlink);
-           }
-           else {
-               /*
-                *      Add new pv_hashed_entry after header.
-                */
-               if ((PV_HASHED_ENTRY_NULL == pvh_e) && pvh_new) {
-                 pvh_e = pvh_new;
-                 pvh_new = PV_HASHED_ENTRY_NULL;  /* show we used it */
-               } else if (PV_HASHED_ENTRY_NULL == pvh_e) {
-                 PV_HASHED_ALLOC(pvh_e);
-                 if (PV_HASHED_ENTRY_NULL == pvh_e) {
-                   /* the pv list is empty.
-                    * if we are on the kernel pmap we'll use one of the special private
-                    * kernel pv_e's, else, we need to unlock everything, zalloc a pv_e,
-                    * and restart bringing in the pv_e with us.
-                    */
-                   if (kernel_pmap == pmap) {
-                     PV_HASHED_KERN_ALLOC(pvh_e);
-                   } else {
-                     UNLOCK_PVH(pai);
-                     PMAP_UNLOCK(pmap);
-                     pvh_new = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone);
-                     goto Retry;
-                   }
-                 }
-               }
-
-               if (PV_HASHED_ENTRY_NULL == pvh_e) panic("pvh_e exhaustion");
-               pvh_e->va = vaddr;
-               pvh_e->pmap = pmap;
-               pvh_e->ppn = pn;
-               CHK_NPVHASH();
-               pvhash_idx = pvhashidx(pmap,vaddr);
-               LOCK_PV_HASH(pvhash_idx);
-               insque(&pvh_e->qlink, &pv_h->qlink);
-               hashp = pvhash(pvhash_idx);
-#if PV_DEBUG
-               if(NULL==hashp)panic("pmap_enter 4");
-#endif
-               pvh_e->nexth = *hashp;
-               *hashp = pvh_e;
-               UNLOCK_PV_HASH(pvhash_idx);
-
-               /*
-                *      Remember that we used the pvlist entry.
-                */
-               pvh_e = PV_HASHED_ENTRY_NULL;
-           }
-
-           /*
-            * only count the mapping
-            * for 'managed memory'
-            */
-           OSAddAtomic(+1,  &pmap->stats.resident_count);
-           if (pmap->stats.resident_count > pmap->stats.resident_max) {
-                   pmap->stats.resident_max = pmap->stats.resident_count;
-           }
-       }
-
-       /*
-        * Step 3) Enter the mapping.
-        *
-        *      Build a template to speed up entering -
-        *      only the pfn changes.
-        */
-       template = pa_to_pte(pa) | INTEL_PTE_VALID;
-
-       if (flags & VM_MEM_NOT_CACHEABLE) {
-               if(!(flags & VM_MEM_GUARDED))
-                       template |= INTEL_PTE_PTA;
-               template |= INTEL_PTE_NCACHE;
-       }
-
-       if (pmap != kernel_pmap)
-               template |= INTEL_PTE_USER;
-       if (prot & VM_PROT_WRITE)
-               template |= INTEL_PTE_WRITE;
-
-       if (set_NX == TRUE)
-               template |= INTEL_PTE_NX;
-
-       if (wired) {
-               template |= INTEL_PTE_WIRED;
-               OSAddAtomic(+1,  &pmap->stats.wired_count);
-       }
-       pmap_store_pte(pte, template);
-
-       /* if this was a managed page we delayed unlocking the pv until here
-        * to prevent pmap_page_protect et al from finding it until the pte
-        * has been stored */
-
-       if (managed_page(pai)) {
-         UNLOCK_PVH(pai);
-       }
-
-Done:
-       if (need_tlbflush == TRUE)
-               PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
-
-       if (pvh_e != PV_HASHED_ENTRY_NULL) {
-               PV_HASHED_FREE_LIST(pvh_e, pvh_e, 1);
-       }
-
-       if (pvh_new != PV_HASHED_ENTRY_NULL) {
-         PV_HASHED_KERN_FREE_LIST(pvh_new, pvh_new, 1);
-       }
-
-       PMAP_UNLOCK(pmap);
-       PMAP_TRACE(PMAP_CODE(PMAP__ENTER) | DBG_FUNC_END, 0, 0, 0, 0, 0);
-}
-
 /*
  *     Routine:        pmap_change_wiring
  *     Function:       Change the wiring attribute for a map/virtual-address
 /*
  *     Routine:        pmap_change_wiring
  *     Function:       Change the wiring attribute for a map/virtual-address
@@ -3917,95 +2751,6 @@ phys_page_exists(
        return TRUE;
 }
 
        return TRUE;
 }
 
-void
-mapping_free_prime(void)
-{
-       int             i;
-       pv_hashed_entry_t      pvh_e;
-       pv_hashed_entry_t      pvh_eh;
-       pv_hashed_entry_t      pvh_et;
-       int             pv_cnt;
-
-       pv_cnt = 0;
-       pvh_eh = pvh_et = PV_HASHED_ENTRY_NULL;
-       for (i = 0; i < (5 * PV_HASHED_ALLOC_CHUNK); i++) {
-               pvh_e = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone);
-
-               pvh_e->qlink.next = (queue_entry_t)pvh_eh;
-               pvh_eh = pvh_e;
-
-               if (pvh_et == PV_HASHED_ENTRY_NULL)
-                       pvh_et = pvh_e;
-               pv_cnt++;
-       }
-       PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pv_cnt);
-
-       pv_cnt = 0;
-       pvh_eh = pvh_et = PV_HASHED_ENTRY_NULL;
-       for (i = 0; i < PV_HASHED_KERN_ALLOC_CHUNK; i++) {
-               pvh_e = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone);
-
-               pvh_e->qlink.next = (queue_entry_t)pvh_eh;
-               pvh_eh = pvh_e;
-
-               if (pvh_et == PV_HASHED_ENTRY_NULL)
-                       pvh_et = pvh_e;
-               pv_cnt++;
-       }
-       PV_HASHED_KERN_FREE_LIST(pvh_eh, pvh_et, pv_cnt);
-
-}
-
-void
-mapping_adjust(void)
-{
-       pv_hashed_entry_t      pvh_e;
-       pv_hashed_entry_t      pvh_eh;
-       pv_hashed_entry_t      pvh_et;
-       int             pv_cnt;
-       int             i;
-
-       if (mapping_adjust_call == NULL) {
-               thread_call_setup(&mapping_adjust_call_data,
-                                 (thread_call_func_t) mapping_adjust,
-                                 (thread_call_param_t) NULL);
-               mapping_adjust_call = &mapping_adjust_call_data;
-       }
-
-       pv_cnt = 0;
-       pvh_eh = pvh_et = PV_HASHED_ENTRY_NULL;
-       if (pv_hashed_kern_free_count < PV_HASHED_KERN_LOW_WATER_MARK) {
-               for (i = 0; i < PV_HASHED_KERN_ALLOC_CHUNK; i++) {
-                       pvh_e = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone);
-
-                       pvh_e->qlink.next = (queue_entry_t)pvh_eh;
-                       pvh_eh = pvh_e;
-
-                       if (pvh_et == PV_HASHED_ENTRY_NULL)
-                               pvh_et = pvh_e;
-                       pv_cnt++;
-               }
-               PV_HASHED_KERN_FREE_LIST(pvh_eh, pvh_et, pv_cnt);
-       }
-
-       pv_cnt = 0;
-       pvh_eh = pvh_et = PV_HASHED_ENTRY_NULL;
-       if (pv_hashed_free_count < PV_HASHED_LOW_WATER_MARK) {
-               for (i = 0; i < PV_HASHED_ALLOC_CHUNK; i++) {
-                       pvh_e = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone);
-
-                       pvh_e->qlink.next = (queue_entry_t)pvh_eh;
-                       pvh_eh = pvh_e;
-
-                       if (pvh_et == PV_HASHED_ENTRY_NULL)
-                               pvh_et = pvh_e;
-                       pv_cnt++;
-               }
-               PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pv_cnt);
-       }
-       mappingrecurse = 0;
-}
-
 void
 pmap_commpage32_init(vm_offset_t kernel_commpage, vm_offset_t user_commpage, int cnt)
 {
 void
 pmap_commpage32_init(vm_offset_t kernel_commpage, vm_offset_t user_commpage, int cnt)
 {
index 9e6d65d20b9e12c5091b0717bf1b46f336cc8297..0acf265d2ab0a2b24893f64092c9ff073dd744d8 100644 (file)
@@ -432,7 +432,8 @@ enum  high_fixed_addresses {
 #define INTEL_PTE_NX           (1ULL << 63)
 
 #define INTEL_PTE_INVALID       0
 #define INTEL_PTE_NX           (1ULL << 63)
 
 #define INTEL_PTE_INVALID       0
-
+/* This is conservative, but suffices */
+#define INTEL_PTE_RSVD         ((1ULL << 8) | (1ULL << 9) | (1ULL << 10) | (1ULL << 11) | (0x1FFULL << 54))
 #define        pa_to_pte(a)            ((a) & INTEL_PTE_PFN) /* XXX */
 #define        pte_to_pa(p)            ((p) & INTEL_PTE_PFN) /* XXX */
 #define        pte_increment_pa(p)     ((p) += INTEL_OFFMASK+1)
 #define        pa_to_pte(a)            ((a) & INTEL_PTE_PFN) /* XXX */
 #define        pte_to_pa(p)            ((p) & INTEL_PTE_PFN) /* XXX */
 #define        pte_increment_pa(p)     ((p) += INTEL_OFFMASK+1)
@@ -704,7 +705,7 @@ extern vm_offset_t pmap_high_shared_remap(enum high_fixed_addresses, vm_offset_t
 #endif
 
 extern void pt_fake_zone_info(int *, vm_size_t *, vm_size_t *, vm_size_t *, vm_size_t *, int *, int *);
 #endif
 
 extern void pt_fake_zone_info(int *, vm_size_t *, vm_size_t *, vm_size_t *, vm_size_t *, int *, int *);
-
+extern void pmap_pagetable_corruption_msg_log(int (*)(const char * fmt, ...)__printflike(1,2));
 
 
 /*
 
 
 /*
index 1a1105399083b9351b3c791a8b04530c476d9146..04f4aa0081c1d721eb3e4797d7951b74e54dc4df 100644 (file)
@@ -28,6 +28,7 @@
 
 #include <vm/pmap.h>
 #include <sys/kdebug.h>
 
 #include <vm/pmap.h>
 #include <sys/kdebug.h>
+#include <kern/debug.h>
 
 #ifdef MACH_KERNEL_PRIVATE
 
 
 #ifdef MACH_KERNEL_PRIVATE
 
@@ -43,7 +44,6 @@
        simple_unlock(&(pmap)->lock);           \
 }
 
        simple_unlock(&(pmap)->lock);           \
 }
 
-extern void pmap_flush_tlbs(pmap_t pmap);
 
 #define PMAP_UPDATE_TLBS(pmap, s, e)                                   \
        pmap_flush_tlbs(pmap)
 
 #define PMAP_UPDATE_TLBS(pmap, s, e)                                   \
        pmap_flush_tlbs(pmap)
@@ -67,10 +67,698 @@ void               pmap_expand_pml4(
 void           pmap_expand_pdpt(
                        pmap_t          map,
                        vm_map_offset_t v);
 void           pmap_expand_pdpt(
                        pmap_t          map,
                        vm_map_offset_t v);
+extern void    pmap_flush_tlbs(pmap_t pmap);
+
 #if    defined(__x86_64__)
 extern const boolean_t cpu_64bit;
 #else
 extern boolean_t cpu_64bit;
 #endif
 
 #if    defined(__x86_64__)
 extern const boolean_t cpu_64bit;
 #else
 extern boolean_t cpu_64bit;
 #endif
 
+/*
+ *     Private data structures.
+ */
+
+/*
+ *     For each vm_page_t, there is a list of all currently
+ *     valid virtual mappings of that page.  An entry is
+ *     a pv_rooted_entry_t; the list is the pv_table.
+ *
+ *      N.B.  with the new combo rooted/hashed scheme it is
+ *      only possibly to remove individual non-rooted entries
+ *      if they are found via the hashed chains as there is no
+ *      way to unlink the singly linked hashed entries if navigated to
+ *      via the queue list off the rooted entries.  Think of it as
+ *      hash/walk/pull, keeping track of the prev pointer while walking
+ *      the singly linked hash list.  All of this is to save memory and
+ *      keep both types of pv_entries as small as possible.
+ */
+
+/*
+
+PV HASHING Changes - JK 1/2007
+
+Pve's establish physical to virtual mappings.  These are used for aliasing of a 
+physical page to (potentially many) virtual addresses within pmaps. In the previous 
+implementation the structure of the pv_entries (each 16 bytes in size) was
+
+typedef struct pv_entry {
+    struct pv_entry_t    next;
+    pmap_t                    pmap;
+    vm_map_offset_t   va;
+} *pv_entry_t;
+
+An initial array of these is created at boot time, one per physical page of memory, 
+indexed by the physical page number. Additionally, a pool of entries is created from a 
+pv_zone to be used as needed by pmap_enter() when it is creating new mappings.  
+Originally, we kept this pool around because the code in pmap_enter() was unable to 
+block if it needed an entry and none were available - we'd panic.  Some time ago I 
+restructured the pmap_enter() code so that for user pmaps it can block while zalloc'ing 
+a pv structure and restart, removing a panic from the code (in the case of the kernel 
+pmap we cannot block and still panic, so, we keep a separate hot pool for use only on 
+kernel pmaps).  The pool has not been removed since there is a large performance gain 
+keeping freed pv's around for reuse and not suffering the overhead of zalloc for every new pv we need.
+
+As pmap_enter() created new mappings it linked the new pve's for them off the fixed 
+pv array for that ppn (off the next pointer).  These pve's are accessed for several 
+operations, one of them being address space teardown.  In that case, we basically do this
+
+       for (every page/pte in the space) {
+               calc pve_ptr from the ppn in the pte
+               for (every pv in the list for the ppn) {
+                       if (this pv is for this pmap/vaddr) {
+                               do housekeeping
+                               unlink/free the pv
+                       }
+               }
+       }
+
+The problem arose when we were running, say 8000 (or even 2000) apache or other processes 
+and one or all terminate. The list hanging off each pv array entry could have thousands of 
+entries.  We were continuously linearly searching each of these lists as we stepped through 
+the address space we were tearing down.  Because of the locks we hold, likely taking a cache 
+miss for each node,  and interrupt disabling for MP issues the system became completely 
+unresponsive for many seconds while we did this.
+
+Realizing that pve's are accessed in two distinct ways (linearly running the list by ppn 
+for operations like pmap_page_protect and finding and modifying/removing a single pve as 
+part of pmap_enter processing) has led to modifying the pve structures and databases.
+
+There are now two types of pve structures.  A "rooted" structure which is basically the 
+original structure accessed in an array by ppn, and a ''hashed'' structure accessed on a 
+hash list via a hash of [pmap, vaddr].  These have been designed with the two goals of 
+minimizing wired memory and making the lookup of a ppn faster.  Since a vast majority of 
+pages in the system are not aliased and hence represented by a single pv entry I've kept 
+the rooted entry size as small as possible because there is one of these dedicated for 
+every physical page of memory.  The hashed pve's are larger due to the addition of the hash 
+link and the ppn entry needed for matching while running the hash list to find the entry we 
+are looking for.  This way, only systems that have lots of aliasing (like 2000+ httpd procs) 
+will pay the extra memory price. Both structures have the same first three fields allowing 
+some simplification in the code.
+
+They have these shapes
+
+typedef struct pv_rooted_entry {
+        queue_head_t qlink;
+        vm_map_offset_t va;
+        pmap_t          pmap;
+} *pv_rooted_entry_t;
+
+
+typedef struct pv_hashed_entry {
+  queue_head_t qlink;
+  vm_map_offset_t va;
+  pmap_t        pmap;
+  ppnum_t ppn;
+  struct pv_hashed_entry *nexth;
+} *pv_hashed_entry_t;
+
+The main flow difference is that the code is now aware of the rooted entry and the hashed 
+entries.  Code that runs the pv list still starts with the rooted entry and then continues 
+down the qlink onto the hashed entries.  Code that is looking up a specific pv entry first 
+checks the rooted entry and then hashes and runs the hash list for the match. The hash list 
+lengths are much smaller than the original pv lists that contained all aliases for the specific ppn.
+
+*/
+
+typedef struct pv_rooted_entry {     /* first three entries must match pv_hashed_entry_t */
+        queue_head_t qlink;
+       vm_map_offset_t va;             /* virtual address for mapping */
+       pmap_t          pmap;           /* pmap where mapping lies */
+} *pv_rooted_entry_t;
+
+#define PV_ROOTED_ENTRY_NULL   ((pv_rooted_entry_t) 0)
+
+
+typedef struct pv_hashed_entry {     /* first three entries must match pv_rooted_entry_t */
+       queue_head_t qlink;
+       vm_map_offset_t va;
+       pmap_t        pmap;
+       ppnum_t ppn;
+       struct pv_hashed_entry *nexth;
+} *pv_hashed_entry_t;
+
+#define PV_HASHED_ENTRY_NULL ((pv_hashed_entry_t)0)
+
+/* #define PV_DEBUG 1   uncomment to enable some PV debugging code */
+#ifdef PV_DEBUG
+#define CHK_NPVHASH() if(0 == npvhash) panic("npvhash uninitialized");
+#else
+#define CHK_NPVHASH()
+#endif
+
+#define NPVHASH 4095   /* MUST BE 2^N - 1 */
+#define PV_HASHED_LOW_WATER_MARK 5000
+#define PV_HASHED_KERN_LOW_WATER_MARK 400
+#define PV_HASHED_ALLOC_CHUNK 2000
+#define PV_HASHED_KERN_ALLOC_CHUNK 200
+
+#define        PV_HASHED_ALLOC(pvh_e) { \
+       simple_lock(&pv_hashed_free_list_lock); \
+       if ((pvh_e = pv_hashed_free_list) != 0) { \
+         pv_hashed_free_list = (pv_hashed_entry_t)pvh_e->qlink.next;   \
+         pv_hashed_free_count--;                                       \
+         if (pv_hashed_free_count < PV_HASHED_LOW_WATER_MARK)          \
+                 if (hw_compare_and_store(0,1,(u_int *)&mappingrecurse)) \
+                         thread_call_enter(mapping_adjust_call);       \
+       }                                                               \
+       simple_unlock(&pv_hashed_free_list_lock); \
+}
+
+#define        PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pv_cnt) {   \
+       simple_lock(&pv_hashed_free_list_lock); \
+       pvh_et->qlink.next = (queue_entry_t)pv_hashed_free_list;        \
+       pv_hashed_free_list = pvh_eh; \
+       pv_hashed_free_count += pv_cnt;           \
+       simple_unlock(&pv_hashed_free_list_lock); \
+}
+
+#define        PV_HASHED_KERN_ALLOC(pvh_e) { \
+       simple_lock(&pv_hashed_kern_free_list_lock); \
+       if ((pvh_e = pv_hashed_kern_free_list) != 0) { \
+         pv_hashed_kern_free_list = (pv_hashed_entry_t)pvh_e->qlink.next;      \
+         pv_hashed_kern_free_count--;                                  \
+         if (pv_hashed_kern_free_count < PV_HASHED_KERN_LOW_WATER_MARK) \
+                 if (hw_compare_and_store(0,1,(u_int *)&mappingrecurse)) \
+                         thread_call_enter(mapping_adjust_call);       \
+       }                                                               \
+       simple_unlock(&pv_hashed_kern_free_list_lock); \
+}
+
+#define        PV_HASHED_KERN_FREE_LIST(pvh_eh, pvh_et, pv_cnt) {       \
+       simple_lock(&pv_hashed_kern_free_list_lock); \
+       pvh_et->qlink.next = (queue_entry_t)pv_hashed_kern_free_list;   \
+       pv_hashed_kern_free_list = pvh_eh; \
+       pv_hashed_kern_free_count += pv_cnt;           \
+       simple_unlock(&pv_hashed_kern_free_list_lock); \
+}
+
+/*
+ *     Index into pv_head table, its lock bits, and the modify/reference and managed bits
+ */
+
+#define pa_index(pa)           (i386_btop(pa))
+#define ppn_to_pai(ppn)                ((int)ppn)
+
+#define pai_to_pvh(pai)                (&pv_head_table[pai])
+#define lock_pvh_pai(pai)      bit_lock(pai, (void *)pv_lock_table)
+#define unlock_pvh_pai(pai)    bit_unlock(pai, (void *)pv_lock_table)
+#define pvhash(idx)            (&pv_hash_table[idx])
+
+#define lock_hash_hash(hash)   bit_lock(hash, (void *)pv_hash_lock_table)
+#define unlock_hash_hash(hash) bit_unlock(hash, (void *)pv_hash_lock_table)
+
+#define IS_MANAGED_PAGE(x)                             \
+       ((unsigned int)(x) <= last_managed_page &&      \
+        (pmap_phys_attributes[x] & PHYS_MANAGED))
+
+/*
+ *     Physical page attributes.  Copy bits from PTE definition.
+ */
+#define        PHYS_MODIFIED   INTEL_PTE_MOD   /* page modified */
+#define        PHYS_REFERENCED INTEL_PTE_REF   /* page referenced */
+#define PHYS_MANAGED   INTEL_PTE_VALID /* page is managed */
+
+/*
+ *     Amount of virtual memory mapped by one
+ *     page-directory entry.
+ */
+#define        PDE_MAPPED_SIZE         (pdetova(1))
+
+
+/*
+ *     Locking and TLB invalidation
+ */
+
+/*
+ *     Locking Protocols: (changed 2/2007 JK)
+ *
+ *     There are two structures in the pmap module that need locking:
+ *     the pmaps themselves, and the per-page pv_lists (which are locked
+ *     by locking the pv_lock_table entry that corresponds to the pv_head
+ *     for the list in question.)  Most routines want to lock a pmap and
+ *     then do operations in it that require pv_list locking -- however
+ *     pmap_remove_all and pmap_copy_on_write operate on a physical page
+ *     basis and want to do the locking in the reverse order, i.e. lock
+ *     a pv_list and then go through all the pmaps referenced by that list.
+ *
+ *      The system wide pmap lock has been removed. Now, paths take a lock
+ *      on the pmap before changing its 'shape' and the reverse order lockers
+ *      (coming in by phys ppn) take a lock on the corresponding pv and then
+ *      retest to be sure nothing changed during the window before they locked
+ *      and can then run up/down the pv lists holding the list lock. This also
+ *      lets the pmap layer run (nearly completely) interrupt enabled, unlike
+ *      previously.
+ */
+
+/*
+ * PV locking
+ */
+
+#define LOCK_PVH(index)        {               \
+       mp_disable_preemption();        \
+       lock_pvh_pai(index);            \
+}
+
+#define UNLOCK_PVH(index) {            \
+       unlock_pvh_pai(index);          \
+       mp_enable_preemption();         \
+}
+/*
+ * PV hash locking
+ */
+
+#define LOCK_PV_HASH(hash)         lock_hash_hash(hash)
+#define UNLOCK_PV_HASH(hash)       unlock_hash_hash(hash)
+extern uint32_t npvhash;
+extern pv_hashed_entry_t       *pv_hash_table;  /* hash lists */
+extern pv_hashed_entry_t       pv_hashed_free_list;
+extern pv_hashed_entry_t       pv_hashed_kern_free_list;
+decl_simple_lock_data(extern, pv_hashed_free_list_lock)
+decl_simple_lock_data(extern, pv_hashed_kern_free_list_lock)
+decl_simple_lock_data(extern, pv_hash_table_lock)
+
+extern zone_t          pv_hashed_list_zone;    /* zone of pv_hashed_entry structures */
+
+extern int                     pv_hashed_free_count;
+extern int                     pv_hashed_kern_free_count;
+#define pv_lock_table_size(n)  (((n)+BYTE_SIZE-1)/BYTE_SIZE)
+#define pv_hash_lock_table_size(n)  (((n)+BYTE_SIZE-1)/BYTE_SIZE)
+extern char    *pv_lock_table;         /* pointer to array of bits */
+
+extern char    *pv_hash_lock_table;
+extern pv_rooted_entry_t       pv_head_table;          /* array of entries, one
+                                                        * per page */
+extern uint64_t pde_mapped_size;
+
+extern char            *pmap_phys_attributes;
+extern unsigned int    last_managed_page;
+
+/*
+ * when spinning through pmap_remove
+ * ensure that we don't spend too much
+ * time with preemption disabled.
+ * I'm setting the current threshold
+ * to 20us
+ */
+#define MAX_PREEMPTION_LATENCY_NS 20000
+extern uint64_t max_preemption_latency_tsc;
+
+/* #define DEBUGINTERRUPTS 1  uncomment to ensure pmap callers have interrupts enabled */
+#ifdef DEBUGINTERRUPTS
+#define pmap_intr_assert() {                                                   \
+       if (processor_avail_count > 1 && !ml_get_interrupts_enabled())          \
+               panic("pmap interrupt assert %s, %d",__FILE__, __LINE__);       \
+}
+#else
+#define pmap_intr_assert()
+#endif
+
+extern int             nx_enabled;
+extern unsigned int    inuse_ptepages_count;
+
+static inline uint32_t
+pvhashidx(pmap_t pmap, vm_map_offset_t va)
+{
+       return ((uint32_t)(uintptr_t)pmap ^
+               ((uint32_t)((uint64_t)va >> PAGE_SHIFT) & 0xFFFFFFFF)) &
+              npvhash;
+}
+
+/*
+ * unlinks the pv_hashed_entry_t pvh from the singly linked hash chain.
+ * properly deals with the anchor.
+ * must be called with the hash locked, does not unlock it
+ */
+
+static inline void 
+pmap_pvh_unlink(pv_hashed_entry_t pvh)
+{
+       pv_hashed_entry_t       curh;
+       pv_hashed_entry_t       *pprevh;
+       int                     pvhash_idx;
+
+       CHK_NPVHASH();
+       pvhash_idx = pvhashidx(pvh->pmap, pvh->va);
+
+       pprevh = pvhash(pvhash_idx);
+
+#if PV_DEBUG
+       if (NULL == *pprevh)
+               panic("pvh_unlink null anchor"); /* JK DEBUG */
+#endif
+       curh = *pprevh;
+
+       while (PV_HASHED_ENTRY_NULL != curh) {
+               if (pvh == curh)
+                       break;
+               pprevh = &curh->nexth;
+               curh = curh->nexth;
+       }
+       if (PV_HASHED_ENTRY_NULL == curh) panic("pmap_pvh_unlink no pvh");
+       *pprevh = pvh->nexth;
+       return;
+}
+
+static inline void
+pv_hash_add(pv_hashed_entry_t  pvh_e,
+           pv_rooted_entry_t   pv_h)
+{
+       pv_hashed_entry_t       *hashp;
+       int                     pvhash_idx;
+
+       CHK_NPVHASH();
+       pvhash_idx = pvhashidx(pvh_e->pmap, pvh_e->va);
+       LOCK_PV_HASH(pvhash_idx);
+       insque(&pvh_e->qlink, &pv_h->qlink);
+       hashp = pvhash(pvhash_idx);
+#if PV_DEBUG
+       if (NULL==hashp)
+               panic("pv_hash_add(%p) null hash bucket", pvh_e);
+#endif
+       pvh_e->nexth = *hashp;
+       *hashp = pvh_e;
+       UNLOCK_PV_HASH(pvhash_idx);
+}
+
+static inline void
+pv_hash_remove(pv_hashed_entry_t pvh_e)
+{
+       int                     pvhash_idx;
+
+       CHK_NPVHASH();
+       pvhash_idx = pvhashidx(pvh_e->pmap,pvh_e->va);
+       LOCK_PV_HASH(pvhash_idx);
+       remque(&pvh_e->qlink);
+       pmap_pvh_unlink(pvh_e);
+       UNLOCK_PV_HASH(pvhash_idx);
+}
+
+static inline boolean_t popcnt1(uint64_t distance) {
+       return ((distance & (distance - 1)) == 0);
+}
+
+/*
+ * Routines to handle suppression of/recovery from some forms of pagetable corruption
+ * incidents observed in the field. These can be either software induced (wild
+ * stores to the mapwindows where applicable, use after free errors
+ * (typically of pages addressed physically), mis-directed DMAs etc., or due
+ * to DRAM/memory hierarchy/interconnect errors. Given the theoretical rarity of these errors,
+ * the recording mechanism is deliberately not MP-safe. The overarching goal is to
+ * still assert on potential software races, but attempt recovery from incidents
+ * identifiable as occurring due to issues beyond the control of the pmap module.
+ * The latter includes single-bit errors and malformed pagetable entries.
+ * We currently limit ourselves to recovery/suppression of one incident per
+ * PMAP_PAGETABLE_CORRUPTION_INTERVAL seconds, and details of the incident
+ * are logged.
+ * Assertions are not suppressed if kernel debugging is enabled. (DRK 09)
+ */
+
+typedef enum {
+       PTE_VALID               = 0x0,
+       PTE_INVALID             = 0x1,
+       PTE_RSVD                = 0x2,
+       PTE_SUPERVISOR          = 0x4,
+       PTE_BITFLIP             = 0x8,
+       PV_BITFLIP              = 0x10,
+       PTE_INVALID_CACHEABILITY = 0x20
+} pmap_pagetable_corruption_t;
+
+typedef enum {
+       ROOT_PRESENT = 0,
+       ROOT_ABSENT = 1
+} pmap_pv_assertion_t;
+
+typedef enum {
+       PMAP_ACTION_IGNORE      = 0x0,
+       PMAP_ACTION_ASSERT      = 0x1,
+       PMAP_ACTION_RETRY       = 0x2,
+       PMAP_ACTION_RETRY_RELOCK = 0x4
+} pmap_pagetable_corruption_action_t;
+
+#define        PMAP_PAGETABLE_CORRUPTION_INTERVAL (6ULL * 3600ULL)
+extern uint64_t pmap_pagetable_corruption_interval_abstime;
+
+extern uint32_t pmap_pagetable_corruption_incidents;
+#define PMAP_PAGETABLE_CORRUPTION_MAX_LOG (8)
+typedef struct {
+       pmap_pv_assertion_t incident;
+       pmap_pagetable_corruption_t reason;
+       pmap_pagetable_corruption_action_t action;
+       pmap_t  pmap;
+       vm_map_offset_t vaddr;
+       pt_entry_t pte;
+       ppnum_t ppn;
+       pmap_t pvpmap;
+       vm_map_offset_t pvva;
+       uint64_t abstime;
+} pmap_pagetable_corruption_record_t;
+
+extern pmap_pagetable_corruption_record_t pmap_pagetable_corruption_records[];
+extern uint64_t pmap_pagetable_corruption_last_abstime;
+extern thread_call_t   pmap_pagetable_corruption_log_call;
+extern boolean_t pmap_pagetable_corruption_timeout;
+
+static inline void
+pmap_pagetable_corruption_log(pmap_pv_assertion_t incident, pmap_pagetable_corruption_t suppress_reason, pmap_pagetable_corruption_action_t action, pmap_t pmap, vm_map_offset_t vaddr, pt_entry_t *ptep, ppnum_t ppn, pmap_t pvpmap, vm_map_offset_t pvva) {
+       uint32_t pmap_pagetable_corruption_log_index;
+       pmap_pagetable_corruption_log_index = pmap_pagetable_corruption_incidents++ % PMAP_PAGETABLE_CORRUPTION_MAX_LOG;
+       pmap_pagetable_corruption_records[pmap_pagetable_corruption_log_index].incident = incident;
+       pmap_pagetable_corruption_records[pmap_pagetable_corruption_log_index].reason = suppress_reason;
+       pmap_pagetable_corruption_records[pmap_pagetable_corruption_log_index].action = action;
+       pmap_pagetable_corruption_records[pmap_pagetable_corruption_log_index].pmap = pmap;
+       pmap_pagetable_corruption_records[pmap_pagetable_corruption_log_index].vaddr = vaddr;
+       pmap_pagetable_corruption_records[pmap_pagetable_corruption_log_index].pte = *ptep;
+       pmap_pagetable_corruption_records[pmap_pagetable_corruption_log_index].ppn = ppn;
+       pmap_pagetable_corruption_records[pmap_pagetable_corruption_log_index].pvpmap = pvpmap;
+       pmap_pagetable_corruption_records[pmap_pagetable_corruption_log_index].pvva = pvva;
+       pmap_pagetable_corruption_records[pmap_pagetable_corruption_log_index].abstime = mach_absolute_time();
+       /* Asynchronously log */
+       thread_call_enter(pmap_pagetable_corruption_log_call);
+}
+
+static inline pmap_pagetable_corruption_action_t
+pmap_classify_pagetable_corruption(pmap_t pmap, vm_map_offset_t vaddr, ppnum_t *ppnp, pt_entry_t *ptep, pmap_pv_assertion_t incident) {
+       pmap_pv_assertion_t     action = PMAP_ACTION_ASSERT;
+       pmap_pagetable_corruption_t     suppress_reason = PTE_VALID;
+       ppnum_t                 suppress_ppn = 0;
+       pt_entry_t cpte = *ptep;
+       ppnum_t cpn = pa_index(pte_to_pa(cpte));
+       ppnum_t ppn = *ppnp;
+       pv_rooted_entry_t       pv_h = pai_to_pvh(ppn_to_pai(ppn));
+       pv_rooted_entry_t       pv_e = pv_h;
+       uint32_t        bitdex;
+       pmap_t pvpmap = pv_h->pmap;
+       vm_map_offset_t pvva = pv_h->va;
+       boolean_t ppcd = FALSE;
+
+       /* Ideally, we'd consult the Mach VM here to definitively determine
+        * the nature of the mapping for this address space and address.
+        * As that would be a layering violation in this context, we
+        * use various heuristics to recover from single bit errors,
+        * malformed pagetable entries etc. These are not intended
+        * to be comprehensive.
+        */
+
+       /* As a precautionary measure, mark A+D */
+       pmap_phys_attributes[ppn_to_pai(ppn)] |= (PHYS_MODIFIED | PHYS_REFERENCED);
+
+       /*
+        * Correct potential single bit errors in either (but not both) element
+        * of the PV
+        */
+       do {
+               if ((popcnt1((uintptr_t)pv_e->pmap ^ (uintptr_t)pmap) && pv_e->va == vaddr) ||
+                   (pv_e->pmap == pmap && popcnt1(pv_e->va ^ vaddr))) {
+                       pv_e->pmap = pmap;
+                       pv_e->va = vaddr;
+                       suppress_reason = PV_BITFLIP;
+                       action = PMAP_ACTION_RETRY;
+                       goto pmap_cpc_exit;
+               }
+       } while((pv_e = (pv_rooted_entry_t) queue_next(&pv_e->qlink)) != pv_h);
+
+       /* Discover root entries with a Hamming
+        * distance of 1 from the supplied
+        * physical page frame.
+        */
+       for (bitdex = 0; bitdex < (sizeof(ppnum_t) << 3); bitdex++) {
+               ppnum_t npn = cpn ^ (ppnum_t) (1ULL << bitdex);
+               if (IS_MANAGED_PAGE(npn)) {
+                       pv_rooted_entry_t npv_h = pai_to_pvh(ppn_to_pai(npn));
+                       if (npv_h->va == vaddr && npv_h->pmap == pmap) {
+                               suppress_reason = PTE_BITFLIP;
+                               suppress_ppn = npn;
+                               action = PMAP_ACTION_RETRY_RELOCK;
+                               UNLOCK_PVH(ppn_to_pai(ppn));
+                               *ppnp = npn;
+                               goto pmap_cpc_exit;
+                       }
+               }
+       }
+
+       if (pmap == kernel_pmap) {
+               action = PMAP_ACTION_ASSERT;
+               goto pmap_cpc_exit;
+       }
+
+       /* Check for malformed/inconsistent entries */
+
+       if ((cpte & (INTEL_PTE_NCACHE | INTEL_PTE_WTHRU | INTEL_PTE_PTA)) ==  (INTEL_PTE_NCACHE | INTEL_PTE_WTHRU)) {
+               action = PMAP_ACTION_IGNORE;
+               suppress_reason = PTE_INVALID_CACHEABILITY;
+       }
+       else if (cpte & INTEL_PTE_RSVD) {
+               action = PMAP_ACTION_IGNORE;
+               suppress_reason = PTE_RSVD;
+       }
+       else if ((pmap != kernel_pmap) && ((cpte & INTEL_PTE_USER) == 0)) {
+               action = PMAP_ACTION_IGNORE;
+               suppress_reason = PTE_SUPERVISOR;
+       }
+pmap_cpc_exit:
+       PE_parse_boot_argn("-pmap_pagetable_corruption_deassert", &ppcd, sizeof(ppcd));
+
+       if (debug_boot_arg && !ppcd) {
+               action = PMAP_ACTION_ASSERT;
+       }
+
+       if ((mach_absolute_time() - pmap_pagetable_corruption_last_abstime) < pmap_pagetable_corruption_interval_abstime) {
+               action = PMAP_ACTION_ASSERT;
+               pmap_pagetable_corruption_timeout = TRUE;
+       }
+       else
+       {
+               pmap_pagetable_corruption_last_abstime = mach_absolute_time();
+       }
+       pmap_pagetable_corruption_log(incident, suppress_reason, action, pmap, vaddr, &cpte, *ppnp, pvpmap, pvva);
+       return action;
+}
+/*
+ * Remove pv list entry.
+ * Called with pv_head_table entry locked.
+ * Returns pv entry to be freed (or NULL).
+ */
+
+static inline __attribute__((always_inline)) pv_hashed_entry_t
+pmap_pv_remove( pmap_t         pmap,
+               vm_map_offset_t vaddr,
+               ppnum_t         *ppnp,
+               pt_entry_t      *pte) 
+{
+       pv_hashed_entry_t       pvh_e;
+       pv_rooted_entry_t       pv_h;
+       pv_hashed_entry_t       *pprevh;
+       int                     pvhash_idx;
+       uint32_t                pv_cnt;
+       ppnum_t                 ppn;
+
+pmap_pv_remove_retry:
+       ppn = *ppnp;
+       pvh_e = PV_HASHED_ENTRY_NULL;
+       pv_h = pai_to_pvh(ppn_to_pai(ppn));
+
+       if (pv_h->pmap == PMAP_NULL) {
+               pmap_pagetable_corruption_action_t pac = pmap_classify_pagetable_corruption(pmap, vaddr, ppnp, pte, ROOT_ABSENT);
+               if (pac == PMAP_ACTION_IGNORE)
+                       goto pmap_pv_remove_exit;
+               else if (pac == PMAP_ACTION_ASSERT)
+                       panic("pmap_pv_remove(%p,0x%llx,0x%x, 0x%llx): null pv_list!", pmap, vaddr, ppn, *pte);
+               else if (pac == PMAP_ACTION_RETRY_RELOCK) {
+                       LOCK_PVH(ppn_to_pai(*ppnp));
+                       pmap_phys_attributes[ppn_to_pai(*ppnp)] |= (PHYS_MODIFIED | PHYS_REFERENCED);
+                       goto pmap_pv_remove_retry;
+               }
+               else if (pac == PMAP_ACTION_RETRY)
+                       goto pmap_pv_remove_retry;
+       }
+
+       if (pv_h->va == vaddr && pv_h->pmap == pmap) {
+               /*
+                * Header is the pv_rooted_entry.
+                * We can't free that. If there is a queued
+                * entry after this one we remove that
+                * from the ppn queue, we remove it from the hash chain
+                * and copy it to the rooted entry. Then free it instead.
+                */
+               pvh_e = (pv_hashed_entry_t) queue_next(&pv_h->qlink);
+               if (pv_h != (pv_rooted_entry_t) pvh_e) {
+                       /*
+                        * Entry queued to root, remove this from hash
+                        * and install as new root.
+                        */
+                       CHK_NPVHASH();
+                       pvhash_idx = pvhashidx(pvh_e->pmap, pvh_e->va);
+                       LOCK_PV_HASH(pvhash_idx);
+                       remque(&pvh_e->qlink);
+                       pprevh = pvhash(pvhash_idx);
+                       if (PV_HASHED_ENTRY_NULL == *pprevh) {
+                               panic("pmap_pv_remove(%p,0x%llx,0x%x): "
+                                     "empty hash, removing rooted",
+                                     pmap, vaddr, ppn);
+                       }
+                       pmap_pvh_unlink(pvh_e);
+                       UNLOCK_PV_HASH(pvhash_idx);
+                       pv_h->pmap = pvh_e->pmap;
+                       pv_h->va = pvh_e->va;   /* dispose of pvh_e */
+               } else {
+                       /* none queued after rooted */
+                       pv_h->pmap = PMAP_NULL;
+                       pvh_e = PV_HASHED_ENTRY_NULL;
+               }
+       } else {
+               /*
+                * not removing rooted pv. find it on hash chain, remove from
+                * ppn queue and hash chain and free it
+                */
+               CHK_NPVHASH();
+               pvhash_idx = pvhashidx(pmap, vaddr);
+               LOCK_PV_HASH(pvhash_idx);
+               pprevh = pvhash(pvhash_idx);
+               if (PV_HASHED_ENTRY_NULL == *pprevh) {
+                       panic("pmap_pv_remove(%p,0x%llx,0x%x): empty hash", pmap, vaddr, ppn);
+               }
+               pvh_e = *pprevh;
+               pmap_pv_hashlist_walks++;
+               pv_cnt = 0;
+               while (PV_HASHED_ENTRY_NULL != pvh_e) {
+                       pv_cnt++;
+                       if (pvh_e->pmap == pmap &&
+                           pvh_e->va == vaddr &&
+                           pvh_e->ppn == ppn)
+                               break;
+                       pprevh = &pvh_e->nexth;
+                       pvh_e = pvh_e->nexth;
+               }
+               if (PV_HASHED_ENTRY_NULL == pvh_e) {
+                       pmap_pagetable_corruption_action_t pac = pmap_classify_pagetable_corruption(pmap, vaddr, ppnp, pte, ROOT_PRESENT);
+
+                       if (pac == PMAP_ACTION_ASSERT)
+                               panic("pmap_pv_remove(%p,0x%llx,0x%x, 0x%llx): pv not on hash, head: %p, 0x%llx", pmap, vaddr, ppn, *pte, pv_h->pmap, pv_h->va);
+                       else {
+                               UNLOCK_PV_HASH(pvhash_idx);
+                               if (pac == PMAP_ACTION_RETRY_RELOCK) {
+                                       LOCK_PVH(ppn_to_pai(*ppnp));
+                                       pmap_phys_attributes[ppn_to_pai(*ppnp)] |= (PHYS_MODIFIED | PHYS_REFERENCED);
+                                       goto pmap_pv_remove_retry;
+                               }
+                               else if (pac == PMAP_ACTION_RETRY) {
+                                       goto pmap_pv_remove_retry;
+                               }
+                               else if (pac == PMAP_ACTION_IGNORE) {
+                                       goto pmap_pv_remove_exit;
+                               }
+                       }
+               }
+               pmap_pv_hashlist_cnts += pv_cnt;
+               if (pmap_pv_hashlist_max < pv_cnt)
+                       pmap_pv_hashlist_max = pv_cnt;
+               *pprevh = pvh_e->nexth;
+               remque(&pvh_e->qlink);
+               UNLOCK_PV_HASH(pvhash_idx);
+       }
+pmap_pv_remove_exit:
+       return pvh_e;
+}
+
 #endif /* MACH_KERNEL_PRIVATE */
 #endif /* MACH_KERNEL_PRIVATE */
index de9b75835b4a259b92ee871f94a29bb71b526660..53c1996e1eb227a6ec5e11305eb93cfd5c92bdbc 100644 (file)
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <i386/pmap_internal.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <i386/pmap_internal.h>
+
+
+void           pmap_remove_range(
+                       pmap_t          pmap,
+                       vm_map_offset_t va,
+                       pt_entry_t      *spte,
+                       pt_entry_t      *epte);
+
+pv_rooted_entry_t      pv_head_table;          /* array of entries, one per
+                                                * page */
+thread_call_t          mapping_adjust_call;
+static thread_call_data_t mapping_adjust_call_data;
+uint32_t               mappingrecurse = 0;
+
+pmap_pagetable_corruption_record_t pmap_pagetable_corruption_records[PMAP_PAGETABLE_CORRUPTION_MAX_LOG];
+uint32_t pmap_pagetable_corruption_incidents;
+uint64_t pmap_pagetable_corruption_last_abstime = (~(0ULL) >> 1);
+uint64_t pmap_pagetable_corruption_interval_abstime;
+thread_call_t  pmap_pagetable_corruption_log_call;
+static thread_call_data_t      pmap_pagetable_corruption_log_call_data;
+boolean_t pmap_pagetable_corruption_timeout = FALSE;
+
 /*
  * The Intel platform can nest at the PDE level, so NBPDE (i.e. 2MB) at a time,
  * on a NBPDE boundary.
 /*
  * The Intel platform can nest at the PDE level, so NBPDE (i.e. 2MB) at a time,
  * on a NBPDE boundary.
@@ -315,3 +337,942 @@ pfp_exit:
         return ppn;
 }
 
         return ppn;
 }
 
+/*
+ *     Insert the given physical page (p) at
+ *     the specified virtual address (v) in the
+ *     target physical map with the protection requested.
+ *
+ *     If specified, the page will be wired down, meaning
+ *     that the related pte cannot be reclaimed.
+ *
+ *     NB:  This is the only routine which MAY NOT lazy-evaluate
+ *     or lose information.  That is, this routine must actually
+ *     insert this page into the given map NOW.
+ */
+void
+pmap_enter(
+       register pmap_t         pmap,
+       vm_map_offset_t         vaddr,
+       ppnum_t                 pn,
+       vm_prot_t               prot,
+       unsigned int            flags,
+       boolean_t               wired)
+{
+       pt_entry_t              *pte;
+       pv_rooted_entry_t       pv_h;
+       int                     pai;
+       pv_hashed_entry_t       pvh_e;
+       pv_hashed_entry_t       pvh_new;
+       pt_entry_t              template;
+       pmap_paddr_t            old_pa;
+       pmap_paddr_t            pa = (pmap_paddr_t) i386_ptob(pn);
+       boolean_t               need_tlbflush = FALSE;
+       boolean_t               set_NX;
+       char                    oattr;
+       boolean_t               old_pa_locked;
+       /* 2MiB mappings are confined to x86_64 by VM */
+       boolean_t               superpage = flags & VM_MEM_SUPERPAGE;
+       vm_object_t             delpage_pm_obj = NULL;
+       int                     delpage_pde_index = 0;
+       pt_entry_t              old_pte;
+
+       pmap_intr_assert();
+       assert(pn != vm_page_fictitious_addr);
+
+       if (pmap == PMAP_NULL)
+               return;
+       if (pn == vm_page_guard_addr)
+               return;
+
+       PMAP_TRACE(PMAP_CODE(PMAP__ENTER) | DBG_FUNC_START,
+                  pmap,
+                  (uint32_t) (vaddr >> 32), (uint32_t) vaddr,
+                  pn, prot);
+
+       if ((prot & VM_PROT_EXECUTE) || !nx_enabled || !pmap->nx_enabled)
+               set_NX = FALSE;
+       else
+               set_NX = TRUE;
+
+       /*
+        *      Must allocate a new pvlist entry while we're unlocked;
+        *      zalloc may cause pageout (which will lock the pmap system).
+        *      If we determine we need a pvlist entry, we will unlock
+        *      and allocate one.  Then we will retry, throughing away
+        *      the allocated entry later (if we no longer need it).
+        */
+
+       pvh_new = PV_HASHED_ENTRY_NULL;
+Retry:
+       pvh_e = PV_HASHED_ENTRY_NULL;
+
+       PMAP_LOCK(pmap);
+
+       /*
+        *      Expand pmap to include this pte.  Assume that
+        *      pmap is always expanded to include enough hardware
+        *      pages to map one VM page.
+        */
+        if(superpage) {
+               while ((pte = pmap64_pde(pmap, vaddr)) == PD_ENTRY_NULL) {
+                       /* need room for another pde entry */
+                       PMAP_UNLOCK(pmap);
+                       pmap_expand_pdpt(pmap, vaddr);
+                       PMAP_LOCK(pmap);
+               }
+       } else {
+               while ((pte = pmap_pte(pmap, vaddr)) == PT_ENTRY_NULL) {
+                       /*
+                        * Must unlock to expand the pmap
+                        * going to grow pde level page(s)
+                        */
+                       PMAP_UNLOCK(pmap);
+                       pmap_expand(pmap, vaddr);
+                       PMAP_LOCK(pmap);
+               }
+       }
+
+       if (superpage && *pte && !(*pte & INTEL_PTE_PS)) {
+               /*
+                * There is still an empty page table mapped that
+                * was used for a previous base page mapping.
+                * Remember the PDE and the PDE index, so that we
+                * can free the page at the end of this function.
+                */
+               delpage_pde_index = (int)pdeidx(pmap, vaddr);
+               delpage_pm_obj = pmap->pm_obj;
+               *pte = 0;
+       }
+
+
+       old_pa = pte_to_pa(*pte);
+       pai = pa_index(old_pa);
+       old_pa_locked = FALSE;
+
+       /*
+        * if we have a previous managed page, lock the pv entry now. after
+        * we lock it, check to see if someone beat us to the lock and if so
+        * drop the lock
+        */
+       if ((0 != old_pa) && IS_MANAGED_PAGE(pai)) {
+               LOCK_PVH(pai);
+               old_pa_locked = TRUE;
+               old_pa = pte_to_pa(*pte);
+               if (0 == old_pa) {
+                       UNLOCK_PVH(pai);        /* another path beat us to it */
+                       old_pa_locked = FALSE;
+               }
+       }
+
+       /*
+        *      Special case if the incoming physical page is already mapped
+        *      at this address.
+        */
+       if (old_pa == pa) {
+
+               /*
+                *      May be changing its wired attribute or protection
+                */
+
+               template = pa_to_pte(pa) | INTEL_PTE_VALID;
+
+               if (VM_MEM_NOT_CACHEABLE ==
+                   (flags & (VM_MEM_NOT_CACHEABLE | VM_WIMG_USE_DEFAULT))) {
+                       if (!(flags & VM_MEM_GUARDED))
+                               template |= INTEL_PTE_PTA;
+                       template |= INTEL_PTE_NCACHE;
+               }
+               if (pmap != kernel_pmap)
+                       template |= INTEL_PTE_USER;
+               if (prot & VM_PROT_WRITE)
+                       template |= INTEL_PTE_WRITE;
+
+               if (set_NX)
+                       template |= INTEL_PTE_NX;
+
+               if (wired) {
+                       template |= INTEL_PTE_WIRED;
+                       if (!iswired(*pte))
+                               OSAddAtomic(+1,
+                                       &pmap->stats.wired_count);
+               } else {
+                       if (iswired(*pte)) {
+                               assert(pmap->stats.wired_count >= 1);
+                               OSAddAtomic(-1,
+                                       &pmap->stats.wired_count);
+                       }
+               }
+               if (superpage)          /* this path can not be used */
+                       template |= INTEL_PTE_PS;       /* to change the page size! */
+
+               /* store modified PTE and preserve RC bits */
+               pmap_update_pte(pte, *pte,
+                       template | (*pte & (INTEL_PTE_REF | INTEL_PTE_MOD)));
+               if (old_pa_locked) {
+                       UNLOCK_PVH(pai);
+                       old_pa_locked = FALSE;
+               }
+               need_tlbflush = TRUE;
+               goto Done;
+       }
+
+       /*
+        *      Outline of code from here:
+        *         1) If va was mapped, update TLBs, remove the mapping
+        *            and remove old pvlist entry.
+        *         2) Add pvlist entry for new mapping
+        *         3) Enter new mapping.
+        *
+        *      If the old physical page is not managed step 1) is skipped
+        *      (except for updating the TLBs), and the mapping is
+        *      overwritten at step 3).  If the new physical page is not
+        *      managed, step 2) is skipped.
+        */
+
+       if (old_pa != (pmap_paddr_t) 0) {
+
+               /*
+                *      Don't do anything to pages outside valid memory here.
+                *      Instead convince the code that enters a new mapping
+                *      to overwrite the old one.
+                */
+
+               /* invalidate the PTE */
+               pmap_update_pte(pte, *pte, (*pte & ~INTEL_PTE_VALID));
+               /* propagate invalidate everywhere */
+               PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
+               /* remember reference and change */
+               old_pte = *pte;
+               oattr = (char) (old_pte & (PHYS_MODIFIED | PHYS_REFERENCED));
+               /* completely invalidate the PTE */
+               pmap_store_pte(pte, 0);
+
+               if (IS_MANAGED_PAGE(pai)) {
+#if TESTING
+                       if (pmap->stats.resident_count < 1)
+                               panic("pmap_enter: resident_count");
+#endif
+                       assert(pmap->stats.resident_count >= 1);
+                       OSAddAtomic(-1,
+                               &pmap->stats.resident_count);
+
+                       if (iswired(*pte)) {
+#if TESTING
+                               if (pmap->stats.wired_count < 1)
+                                       panic("pmap_enter: wired_count");
+#endif
+                               assert(pmap->stats.wired_count >= 1);
+                               OSAddAtomic(-1,
+                                       &pmap->stats.wired_count);
+                       }
+                       pmap_phys_attributes[pai] |= oattr;
+
+                       /*
+                        *      Remove the mapping from the pvlist for
+                        *      this physical page.
+                        *      We'll end up with either a rooted pv or a
+                        *      hashed pv
+                        */
+                       pvh_e = pmap_pv_remove(pmap, vaddr, (ppnum_t *) &pai, &old_pte);
+
+               } else {
+
+                       /*
+                        *      old_pa is not managed.
+                        *      Do removal part of accounting.
+                        */
+
+                       if (iswired(*pte)) {
+                               assert(pmap->stats.wired_count >= 1);
+                               OSAddAtomic(-1,
+                                       &pmap->stats.wired_count);
+                       }
+               }
+       }
+
+       /*
+        * if we had a previously managed paged locked, unlock it now
+        */
+       if (old_pa_locked) {
+               UNLOCK_PVH(pai);
+               old_pa_locked = FALSE;
+       }
+
+       pai = pa_index(pa);     /* now working with new incoming phys page */
+       if (IS_MANAGED_PAGE(pai)) {
+
+               /*
+                *      Step 2) Enter the mapping in the PV list for this
+                *      physical page.
+                */
+               pv_h = pai_to_pvh(pai);
+
+               LOCK_PVH(pai);
+
+               if (pv_h->pmap == PMAP_NULL) {
+                       /*
+                        *      No mappings yet, use rooted pv
+                        */
+                       pv_h->va = vaddr;
+                       pv_h->pmap = pmap;
+                       queue_init(&pv_h->qlink);
+               } else {
+                       /*
+                        *      Add new pv_hashed_entry after header.
+                        */
+                       if ((PV_HASHED_ENTRY_NULL == pvh_e) && pvh_new) {
+                               pvh_e = pvh_new;
+                               pvh_new = PV_HASHED_ENTRY_NULL;
+                       } else if (PV_HASHED_ENTRY_NULL == pvh_e) {
+                               PV_HASHED_ALLOC(pvh_e);
+                               if (PV_HASHED_ENTRY_NULL == pvh_e) {
+                                       /*
+                                        * the pv list is empty. if we are on
+                                        * the kernel pmap we'll use one of
+                                        * the special private kernel pv_e's,
+                                        * else, we need to unlock
+                                        * everything, zalloc a pv_e, and
+                                        * restart bringing in the pv_e with
+                                        * us.
+                                        */
+                                       if (kernel_pmap == pmap) {
+                                               PV_HASHED_KERN_ALLOC(pvh_e);
+                                       } else {
+                                               UNLOCK_PVH(pai);
+                                               PMAP_UNLOCK(pmap);
+                                               pvh_new = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone);
+                                               goto Retry;
+                                       }
+                               }
+                       }
+                       
+                       if (PV_HASHED_ENTRY_NULL == pvh_e)
+                               panic("Mapping alias chain exhaustion, possibly induced by numerous kernel virtual double mappings");
+
+                       pvh_e->va = vaddr;
+                       pvh_e->pmap = pmap;
+                       pvh_e->ppn = pn;
+                       pv_hash_add(pvh_e, pv_h);
+
+                       /*
+                        *      Remember that we used the pvlist entry.
+                        */
+                       pvh_e = PV_HASHED_ENTRY_NULL;
+               }
+
+               /*
+                * only count the mapping
+                * for 'managed memory'
+                */
+               OSAddAtomic(+1,  & pmap->stats.resident_count);
+               if (pmap->stats.resident_count > pmap->stats.resident_max) {
+                       pmap->stats.resident_max = pmap->stats.resident_count;
+               }
+       }
+       /*
+        * Step 3) Enter the mapping.
+        *
+        *      Build a template to speed up entering -
+        *      only the pfn changes.
+        */
+       template = pa_to_pte(pa) | INTEL_PTE_VALID;
+
+       if (flags & VM_MEM_NOT_CACHEABLE) {
+               if (!(flags & VM_MEM_GUARDED))
+                       template |= INTEL_PTE_PTA;
+               template |= INTEL_PTE_NCACHE;
+       }
+       if (pmap != kernel_pmap)
+               template |= INTEL_PTE_USER;
+       if (prot & VM_PROT_WRITE)
+               template |= INTEL_PTE_WRITE;
+       if (set_NX)
+               template |= INTEL_PTE_NX;
+       if (wired) {
+               template |= INTEL_PTE_WIRED;
+               OSAddAtomic(+1,  & pmap->stats.wired_count);
+       }
+       if (superpage)
+               template |= INTEL_PTE_PS;
+       pmap_store_pte(pte, template);
+
+       /*
+        * if this was a managed page we delayed unlocking the pv until here
+        * to prevent pmap_page_protect et al from finding it until the pte
+        * has been stored
+        */
+       if (IS_MANAGED_PAGE(pai)) {
+               UNLOCK_PVH(pai);
+       }
+Done:
+       if (need_tlbflush == TRUE)
+               PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
+
+       if (pvh_e != PV_HASHED_ENTRY_NULL) {
+               PV_HASHED_FREE_LIST(pvh_e, pvh_e, 1);
+       }
+       if (pvh_new != PV_HASHED_ENTRY_NULL) {
+               PV_HASHED_KERN_FREE_LIST(pvh_new, pvh_new, 1);
+       }
+       PMAP_UNLOCK(pmap);
+
+       if (delpage_pm_obj) {
+               vm_page_t m;
+
+               vm_object_lock(delpage_pm_obj);
+               m = vm_page_lookup(delpage_pm_obj, delpage_pde_index);
+               if (m == VM_PAGE_NULL)
+                   panic("pmap_enter: pte page not in object");
+               VM_PAGE_FREE(m);
+               OSAddAtomic(-1,  &inuse_ptepages_count);
+               vm_object_unlock(delpage_pm_obj);
+       }
+
+       PMAP_TRACE(PMAP_CODE(PMAP__ENTER) | DBG_FUNC_END, 0, 0, 0, 0, 0);
+}
+
+/*
+ *     Remove a range of hardware page-table entries.
+ *     The entries given are the first (inclusive)
+ *     and last (exclusive) entries for the VM pages.
+ *     The virtual address is the va for the first pte.
+ *
+ *     The pmap must be locked.
+ *     If the pmap is not the kernel pmap, the range must lie
+ *     entirely within one pte-page.  This is NOT checked.
+ *     Assumes that the pte-page exists.
+ */
+
+void
+pmap_remove_range(
+       pmap_t                  pmap,
+       vm_map_offset_t         start_vaddr,
+       pt_entry_t              *spte,
+       pt_entry_t              *epte)
+{
+       pt_entry_t              *cpte;
+       pv_hashed_entry_t       pvh_et = PV_HASHED_ENTRY_NULL;
+       pv_hashed_entry_t       pvh_eh = PV_HASHED_ENTRY_NULL;
+       pv_hashed_entry_t       pvh_e;
+       int                     pvh_cnt = 0;
+       int                     num_removed, num_unwired, num_found, num_invalid;
+       int                     pai;
+       pmap_paddr_t            pa;
+       vm_map_offset_t         vaddr;
+
+       num_removed = 0;
+       num_unwired = 0;
+       num_found   = 0;
+       num_invalid = 0;
+#if    defined(__i386__)
+       if (pmap != kernel_pmap &&
+           pmap->pm_task_map == TASK_MAP_32BIT &&
+           start_vaddr >= HIGH_MEM_BASE) {
+               /*
+                * The range is in the "high_shared_pde" which is shared
+                * between the kernel and all 32-bit tasks.  It holds
+                * the 32-bit commpage but also the trampolines, GDT, etc...
+                * so we can't let user tasks remove anything from it.
+                */
+               return;
+       }
+#endif
+       /* invalidate the PTEs first to "freeze" them */
+       for (cpte = spte, vaddr = start_vaddr;
+            cpte < epte;
+            cpte++, vaddr += PAGE_SIZE_64) {
+               pt_entry_t p = *cpte;
+
+               pa = pte_to_pa(p);
+               if (pa == 0)
+                       continue;
+               num_found++;
+
+               if (iswired(p))
+                       num_unwired++;
+               
+               pai = pa_index(pa);
+
+               if (!IS_MANAGED_PAGE(pai)) {
+                       /*
+                        *      Outside range of managed physical memory.
+                        *      Just remove the mappings.
+                        */
+                       pmap_store_pte(cpte, 0);
+                       continue;
+               }
+
+               if ((p & INTEL_PTE_VALID) == 0)
+                       num_invalid++;
+
+               /* invalidate the PTE */ 
+               pmap_update_pte(cpte, *cpte, (*cpte & ~INTEL_PTE_VALID));
+       }
+
+       if (num_found == 0) {
+               /* nothing was changed: we're done */
+               goto update_counts;
+       }
+
+       /* propagate the invalidates to other CPUs */
+
+       PMAP_UPDATE_TLBS(pmap, start_vaddr, vaddr);
+
+       for (cpte = spte, vaddr = start_vaddr;
+            cpte < epte;
+            cpte++, vaddr += PAGE_SIZE_64) {
+
+               pa = pte_to_pa(*cpte);
+               if (pa == 0)
+                       continue;
+
+               pai = pa_index(pa);
+
+               LOCK_PVH(pai);
+
+               pa = pte_to_pa(*cpte);
+               if (pa == 0) {
+                       UNLOCK_PVH(pai);
+                       continue;
+               }
+               num_removed++;
+
+               /*
+                * Get the modify and reference bits, then
+                * nuke the entry in the page table
+                */
+               /* remember reference and change */
+               pmap_phys_attributes[pai] |=
+                       (char) (*cpte & (PHYS_MODIFIED | PHYS_REFERENCED));
+
+               /*
+                * Remove the mapping from the pvlist for this physical page.
+                */
+               pvh_e = pmap_pv_remove(pmap, vaddr, (ppnum_t *) &pai, cpte);
+
+               /* completely invalidate the PTE */
+               pmap_store_pte(cpte, 0);
+
+               UNLOCK_PVH(pai);
+
+               if (pvh_e != PV_HASHED_ENTRY_NULL) {
+                       pvh_e->qlink.next = (queue_entry_t) pvh_eh;
+                       pvh_eh = pvh_e;
+
+                       if (pvh_et == PV_HASHED_ENTRY_NULL) {
+                               pvh_et = pvh_e;
+                       }
+                       pvh_cnt++;
+               }
+       } /* for loop */
+
+       if (pvh_eh != PV_HASHED_ENTRY_NULL) {
+               PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pvh_cnt);
+       }
+update_counts:
+       /*
+        *      Update the counts
+        */
+#if TESTING
+       if (pmap->stats.resident_count < num_removed)
+               panic("pmap_remove_range: resident_count");
+#endif
+       assert(pmap->stats.resident_count >= num_removed);
+       OSAddAtomic(-num_removed,  &pmap->stats.resident_count);
+
+#if TESTING
+       if (pmap->stats.wired_count < num_unwired)
+               panic("pmap_remove_range: wired_count");
+#endif
+       assert(pmap->stats.wired_count >= num_unwired);
+       OSAddAtomic(-num_unwired,  &pmap->stats.wired_count);
+
+       return;
+}
+
+
+/*
+ *     Remove the given range of addresses
+ *     from the specified map.
+ *
+ *     It is assumed that the start and end are properly
+ *     rounded to the hardware page size.
+ */
+void
+pmap_remove(
+       pmap_t          map,
+       addr64_t        s64,
+       addr64_t        e64)
+{
+       pt_entry_t     *pde;
+       pt_entry_t     *spte, *epte;
+       addr64_t        l64;
+       uint64_t        deadline;
+
+       pmap_intr_assert();
+
+       if (map == PMAP_NULL || s64 == e64)
+               return;
+
+       PMAP_TRACE(PMAP_CODE(PMAP__REMOVE) | DBG_FUNC_START,
+                  map,
+                  (uint32_t) (s64 >> 32), s64,
+                  (uint32_t) (e64 >> 32), e64);
+
+
+       PMAP_LOCK(map);
+
+#if 0
+       /*
+        * Check that address range in the kernel does not overlap the stacks.
+        * We initialize local static min/max variables once to avoid making
+        * 2 function calls for every remove. Note also that these functions
+        * both return 0 before kernel stacks have been initialized, and hence
+        * the panic is not triggered in this case.
+        */
+       if (map == kernel_pmap) {
+               static vm_offset_t kernel_stack_min = 0;
+               static vm_offset_t kernel_stack_max = 0;
+
+               if (kernel_stack_min == 0) {
+                       kernel_stack_min = min_valid_stack_address();
+                       kernel_stack_max = max_valid_stack_address();
+               }
+               if ((kernel_stack_min <= s64 && s64 < kernel_stack_max) ||
+                   (kernel_stack_min < e64 && e64 <= kernel_stack_max))
+                       panic("pmap_remove() attempted in kernel stack");
+       }
+#else
+
+       /*
+        * The values of kernel_stack_min and kernel_stack_max are no longer
+        * relevant now that we allocate kernel stacks in the kernel map,
+        * so the old code above no longer applies.  If we wanted to check that
+        * we weren't removing a mapping of a page in a kernel stack we'd 
+        * mark the PTE with an unused bit and check that here.
+        */
+
+#endif
+
+       deadline = rdtsc64() + max_preemption_latency_tsc;
+
+       while (s64 < e64) {
+               l64 = (s64 + pde_mapped_size) & ~(pde_mapped_size - 1);
+               if (l64 > e64)
+                       l64 = e64;
+               pde = pmap_pde(map, s64);
+
+               if (pde && (*pde & INTEL_PTE_VALID)) {
+                       if (*pde & INTEL_PTE_PS) {
+                               /*
+                                * If we're removing a superpage, pmap_remove_range()
+                                * must work on level 2 instead of level 1; and we're
+                                * only passing a single level 2 entry instead of a
+                                * level 1 range.
+                                */
+                               spte = pde;
+                               epte = spte+1; /* excluded */
+                       } else {
+                               spte = pmap_pte(map, (s64 & ~(pde_mapped_size - 1)));
+                               spte = &spte[ptenum(s64)];
+                               epte = &spte[intel_btop(l64 - s64)];
+                       }
+                       pmap_remove_range(map, s64, spte, epte);
+               }
+               s64 = l64;
+
+               if (s64 < e64 && rdtsc64() >= deadline) {
+                       PMAP_UNLOCK(map)
+                       PMAP_LOCK(map)
+                       deadline = rdtsc64() + max_preemption_latency_tsc;
+               }
+       }
+
+       PMAP_UNLOCK(map);
+
+       PMAP_TRACE(PMAP_CODE(PMAP__REMOVE) | DBG_FUNC_END,
+                  map, 0, 0, 0, 0);
+
+}
+
+/*
+ *     Routine:        pmap_page_protect
+ *
+ *     Function:
+ *             Lower the permission for all mappings to a given
+ *             page.
+ */
+void
+pmap_page_protect(
+        ppnum_t         pn,
+       vm_prot_t       prot)
+{
+       pv_hashed_entry_t       pvh_eh = PV_HASHED_ENTRY_NULL;
+       pv_hashed_entry_t       pvh_et = PV_HASHED_ENTRY_NULL;
+       pv_hashed_entry_t       nexth;
+       int                     pvh_cnt = 0;
+       pv_rooted_entry_t       pv_h;
+       pv_rooted_entry_t       pv_e;
+       pv_hashed_entry_t       pvh_e;
+       pt_entry_t              *pte;
+       int                     pai;
+       pmap_t                  pmap;
+       boolean_t               remove;
+
+       pmap_intr_assert();
+       assert(pn != vm_page_fictitious_addr);
+       if (pn == vm_page_guard_addr)
+               return;
+
+       pai = ppn_to_pai(pn);
+
+       if (!IS_MANAGED_PAGE(pai)) {
+               /*
+                *      Not a managed page.
+                */
+               return;
+       }
+       PMAP_TRACE(PMAP_CODE(PMAP__PAGE_PROTECT) | DBG_FUNC_START,
+                  pn, prot, 0, 0, 0);
+
+       /*
+        * Determine the new protection.
+        */
+       switch (prot) {
+       case VM_PROT_READ:
+       case VM_PROT_READ | VM_PROT_EXECUTE:
+               remove = FALSE;
+               break;
+       case VM_PROT_ALL:
+               return;         /* nothing to do */
+       default:
+               remove = TRUE;
+               break;
+       }
+
+       pv_h = pai_to_pvh(pai);
+
+       LOCK_PVH(pai);
+
+
+       /*
+        * Walk down PV list, if any, changing or removing all mappings.
+        */
+       if (pv_h->pmap == PMAP_NULL)
+               goto done;
+
+       pv_e = pv_h;
+       pvh_e = (pv_hashed_entry_t) pv_e;       /* cheat */
+
+       do {
+               vm_map_offset_t vaddr;
+
+               pmap = pv_e->pmap;
+               vaddr = pv_e->va;
+               pte = pmap_pte(pmap, vaddr);
+
+#if    DEBUG
+               if (pa_index(pte_to_pa(*pte)) != pn)
+                       panic("pmap_page_protect: PTE mismatch, pn: 0x%x, pmap: %p, vaddr: 0x%llx, pte: 0x%llx", pn, pmap, vaddr, *pte);
+#endif
+               if (0 == pte) {
+                       panic("pmap_page_protect() "
+                               "pmap=%p pn=0x%x vaddr=0x%llx\n",
+                               pmap, pn, vaddr);
+               }
+               nexth = (pv_hashed_entry_t) queue_next(&pvh_e->qlink);
+
+               /*
+                * Remove the mapping if new protection is NONE
+                * or if write-protecting a kernel mapping.
+                */
+               if (remove || pmap == kernel_pmap) {
+                       /*
+                        * Remove the mapping, collecting dirty bits.
+                        */
+                       pmap_update_pte(pte, *pte, *pte & ~INTEL_PTE_VALID);
+                       PMAP_UPDATE_TLBS(pmap, vaddr, vaddr+PAGE_SIZE);
+                       pmap_phys_attributes[pai] |=
+                               *pte & (PHYS_MODIFIED|PHYS_REFERENCED);
+                       pmap_store_pte(pte, 0);
+
+#if TESTING
+                       if (pmap->stats.resident_count < 1)
+                               panic("pmap_page_protect: resident_count");
+#endif
+                       assert(pmap->stats.resident_count >= 1);
+                       OSAddAtomic(-1,  &pmap->stats.resident_count);
+
+                       /*
+                        * Deal with the pv_rooted_entry.
+                        */
+
+                       if (pv_e == pv_h) {
+                               /*
+                                * Fix up head later.
+                                */
+                               pv_h->pmap = PMAP_NULL;
+                       } else {
+                               /*
+                                * Delete this entry.
+                                */
+                               pv_hash_remove(pvh_e);
+                               pvh_e->qlink.next = (queue_entry_t) pvh_eh;
+                               pvh_eh = pvh_e;
+
+                               if (pvh_et == PV_HASHED_ENTRY_NULL)
+                                       pvh_et = pvh_e;
+                               pvh_cnt++;
+                       }
+               } else {
+                       /*
+                        * Write-protect.
+                        */
+                       pmap_update_pte(pte, *pte, *pte & ~INTEL_PTE_WRITE);
+                       PMAP_UPDATE_TLBS(pmap, vaddr, vaddr+PAGE_SIZE);
+               }
+               pvh_e = nexth;
+       } while ((pv_e = (pv_rooted_entry_t) nexth) != pv_h);
+
+
+       /*
+        * If pv_head mapping was removed, fix it up.
+        */
+       if (pv_h->pmap == PMAP_NULL) {
+               pvh_e = (pv_hashed_entry_t) queue_next(&pv_h->qlink);
+
+               if (pvh_e != (pv_hashed_entry_t) pv_h) {
+                       pv_hash_remove(pvh_e);
+                       pv_h->pmap = pvh_e->pmap;
+                       pv_h->va = pvh_e->va;
+                       pvh_e->qlink.next = (queue_entry_t) pvh_eh;
+                       pvh_eh = pvh_e;
+
+                       if (pvh_et == PV_HASHED_ENTRY_NULL)
+                               pvh_et = pvh_e;
+                       pvh_cnt++;
+               }
+       }
+       if (pvh_eh != PV_HASHED_ENTRY_NULL) {
+               PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pvh_cnt);
+       }
+done:
+       UNLOCK_PVH(pai);
+
+       PMAP_TRACE(PMAP_CODE(PMAP__PAGE_PROTECT) | DBG_FUNC_END,
+                  0, 0, 0, 0, 0);
+}
+
+__private_extern__ void
+pmap_pagetable_corruption_msg_log(int (*log_func)(const char * fmt, ...)__printflike(1,2)) {
+       if (pmap_pagetable_corruption_incidents > 0) {
+               int i, e = MIN(pmap_pagetable_corruption_incidents, PMAP_PAGETABLE_CORRUPTION_MAX_LOG);
+               (*log_func)("%u pagetable corruption incident(s) detected, timeout: %u\n", pmap_pagetable_corruption_incidents, pmap_pagetable_corruption_timeout);
+               for (i = 0; i < e; i++) {
+                       (*log_func)("Incident 0x%x, reason: 0x%x, action: 0x%x, time: 0x%llx\n", pmap_pagetable_corruption_records[i].incident,  pmap_pagetable_corruption_records[i].reason, pmap_pagetable_corruption_records[i].action, pmap_pagetable_corruption_records[i].abstime);
+               }
+       }
+}
+
+void
+mapping_free_prime(void)
+{
+       int                     i;
+       pv_hashed_entry_t       pvh_e;
+       pv_hashed_entry_t       pvh_eh;
+       pv_hashed_entry_t       pvh_et;
+       int                     pv_cnt;
+
+       pv_cnt = 0;
+       pvh_eh = pvh_et = PV_HASHED_ENTRY_NULL;
+       for (i = 0; i < (5 * PV_HASHED_ALLOC_CHUNK); i++) {
+               pvh_e = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone);
+
+               pvh_e->qlink.next = (queue_entry_t)pvh_eh;
+               pvh_eh = pvh_e;
+
+               if (pvh_et == PV_HASHED_ENTRY_NULL)
+                       pvh_et = pvh_e;
+               pv_cnt++;
+       }
+       PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pv_cnt);
+
+       pv_cnt = 0;
+       pvh_eh = pvh_et = PV_HASHED_ENTRY_NULL;
+       for (i = 0; i < PV_HASHED_KERN_ALLOC_CHUNK; i++) {
+               pvh_e = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone);
+
+               pvh_e->qlink.next = (queue_entry_t)pvh_eh;
+               pvh_eh = pvh_e;
+
+               if (pvh_et == PV_HASHED_ENTRY_NULL)
+                       pvh_et = pvh_e;
+               pv_cnt++;
+       }
+       PV_HASHED_KERN_FREE_LIST(pvh_eh, pvh_et, pv_cnt);
+
+}
+
+static inline void
+pmap_pagetable_corruption_log_setup(void) {
+       if (pmap_pagetable_corruption_log_call == NULL) {
+               nanotime_to_absolutetime(PMAP_PAGETABLE_CORRUPTION_INTERVAL, 0, &pmap_pagetable_corruption_interval_abstime);
+               thread_call_setup(&pmap_pagetable_corruption_log_call_data,
+                   (thread_call_func_t) pmap_pagetable_corruption_msg_log,
+                   (thread_call_param_t) &printf);
+               pmap_pagetable_corruption_log_call = &pmap_pagetable_corruption_log_call_data;
+       }
+}
+
+void
+mapping_adjust(void)
+{
+       pv_hashed_entry_t       pvh_e;
+       pv_hashed_entry_t       pvh_eh;
+       pv_hashed_entry_t       pvh_et;
+       int                     pv_cnt;
+       int                     i;
+
+       if (mapping_adjust_call == NULL) {
+               thread_call_setup(&mapping_adjust_call_data,
+                                 (thread_call_func_t) mapping_adjust,
+                                 (thread_call_param_t) NULL);
+               mapping_adjust_call = &mapping_adjust_call_data;
+       }
+
+       pmap_pagetable_corruption_log_setup();
+
+       pv_cnt = 0;
+       pvh_eh = pvh_et = PV_HASHED_ENTRY_NULL;
+       if (pv_hashed_kern_free_count < PV_HASHED_KERN_LOW_WATER_MARK) {
+               for (i = 0; i < PV_HASHED_KERN_ALLOC_CHUNK; i++) {
+                       pvh_e = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone);
+
+                       pvh_e->qlink.next = (queue_entry_t)pvh_eh;
+                       pvh_eh = pvh_e;
+
+                       if (pvh_et == PV_HASHED_ENTRY_NULL)
+                               pvh_et = pvh_e;
+                       pv_cnt++;
+               }
+               PV_HASHED_KERN_FREE_LIST(pvh_eh, pvh_et, pv_cnt);
+       }
+
+       pv_cnt = 0;
+       pvh_eh = pvh_et = PV_HASHED_ENTRY_NULL;
+       if (pv_hashed_free_count < PV_HASHED_LOW_WATER_MARK) {
+               for (i = 0; i < PV_HASHED_ALLOC_CHUNK; i++) {
+                       pvh_e = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone);
+
+                       pvh_e->qlink.next = (queue_entry_t)pvh_eh;
+                       pvh_eh = pvh_e;
+
+                       if (pvh_et == PV_HASHED_ENTRY_NULL)
+                               pvh_et = pvh_e;
+                       pv_cnt++;
+               }
+               PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pv_cnt);
+       }
+       mappingrecurse = 0;
+}
+
index 9adfe5b832f90296e4b60148459a118620099a51..d61a26a7705ce22352459fe48c7fb5c3f552011f 100644 (file)
@@ -595,7 +595,7 @@ ipc_kmsg_alloc(
                mach_msg_size_t max_desc = (mach_msg_size_t)(((size - sizeof(mach_msg_base_t)) /
                                           sizeof(mach_msg_ool_descriptor32_t)) *
                                           DESC_SIZE_ADJUSTMENT);
                mach_msg_size_t max_desc = (mach_msg_size_t)(((size - sizeof(mach_msg_base_t)) /
                                           sizeof(mach_msg_ool_descriptor32_t)) *
                                           DESC_SIZE_ADJUSTMENT);
-               if (msg_and_trailer_size >= MACH_MSG_SIZE_MAX - max_desc)
+               if (msg_and_trailer_size > MACH_MSG_SIZE_MAX - max_desc)
                        return IKM_NULL;
 
                max_expanded_size = msg_and_trailer_size + max_desc;
                        return IKM_NULL;
 
                max_expanded_size = msg_and_trailer_size + max_desc;
@@ -617,12 +617,9 @@ ipc_kmsg_alloc(
                        assert(i <= IKM_STASH);
                        kmsg = cache->entries[--i];
                        cache->avail = i;
                        assert(i <= IKM_STASH);
                        kmsg = cache->entries[--i];
                        cache->avail = i;
-                       ikm_check_init(kmsg, max_expanded_size);
                        enable_preemption();
                        enable_preemption();
-                       kmsg->ikm_header = (mach_msg_header_t *)
-                                          ((vm_offset_t)(kmsg + 1) +
-                                           max_expanded_size -
-                                           msg_and_trailer_size);
+                       ikm_check_init(kmsg, max_expanded_size);
+                       ikm_set_header(kmsg, msg_and_trailer_size);
                        return (kmsg);
                }
                enable_preemption();
                        return (kmsg);
                }
                enable_preemption();
@@ -633,10 +630,7 @@ ipc_kmsg_alloc(
 
        if (kmsg != IKM_NULL) {
                ikm_init(kmsg, max_expanded_size);
 
        if (kmsg != IKM_NULL) {
                ikm_init(kmsg, max_expanded_size);
-               kmsg->ikm_header = (mach_msg_header_t *)
-                                  ((vm_offset_t)(kmsg + 1) +
-                                   max_expanded_size -
-                                   msg_and_trailer_size);
+               ikm_set_header(kmsg, msg_and_trailer_size);
        }
 
        return(kmsg);
        }
 
        return(kmsg);
@@ -1072,6 +1066,23 @@ ipc_kmsg_clear_prealloc(
        IP_CLEAR_PREALLOC(port, kmsg);
 }
 
        IP_CLEAR_PREALLOC(port, kmsg);
 }
 
+/*
+ *     Routine:        ipc_kmsg_prealloc
+ *     Purpose:
+ *             Wraper to ipc_kmsg_alloc() to account for
+ *             header expansion requirements.
+ */
+ipc_kmsg_t
+ipc_kmsg_prealloc(mach_msg_size_t size)
+{
+#if defined(__LP64__)
+       if (size > MACH_MSG_SIZE_MAX - LEGACY_HEADER_SIZE_DELTA)
+               return IKM_NULL;
+
+       size += LEGACY_HEADER_SIZE_DELTA;
+#endif
+       return ipc_kmsg_alloc(size);
+}
 
 
 /*
 
 
 /*
@@ -1243,10 +1254,9 @@ ipc_kmsg_get_from_kernel(
         * clients.  These are set up for those kernel clients
         * which cannot afford to wait.
         */
         * clients.  These are set up for those kernel clients
         * which cannot afford to wait.
         */
-#ifndef __LP64__
-       /* LP64todo - does the prealloc kmsg need ikm_header padding?
-        */
        if (IP_PREALLOC(dest_port)) {
        if (IP_PREALLOC(dest_port)) {
+               mach_msg_size_t max_desc = 0;
+
                ip_lock(dest_port);
                if (!ip_active(dest_port)) {
                        ip_unlock(dest_port);
                ip_lock(dest_port);
                if (!ip_active(dest_port)) {
                        ip_unlock(dest_port);
@@ -1254,19 +1264,26 @@ ipc_kmsg_get_from_kernel(
                }
                assert(IP_PREALLOC(dest_port));
                kmsg = dest_port->ip_premsg;
                }
                assert(IP_PREALLOC(dest_port));
                kmsg = dest_port->ip_premsg;
-               if (msg_and_trailer_size > kmsg->ikm_size) {
-                       ip_unlock(dest_port);
-                       return MACH_SEND_TOO_LARGE;
-               }
                if (ikm_prealloc_inuse(kmsg)) {
                        ip_unlock(dest_port);
                        return MACH_SEND_NO_BUFFER;
                }
                if (ikm_prealloc_inuse(kmsg)) {
                        ip_unlock(dest_port);
                        return MACH_SEND_NO_BUFFER;
                }
+#if !defined(__LP64__)
+               if (msg->msgh_bits & MACH_MSGH_BITS_COMPLEX) {
+                       assert(size > sizeof(mach_msg_base_t));
+                       max_desc = ((mach_msg_base_t *)msg)->body.msgh_descriptor_count *
+                               DESC_SIZE_ADJUSTMENT;
+               }
+#endif
+               if (msg_and_trailer_size > kmsg->ikm_size - max_desc) {
+                       ip_unlock(dest_port);
+                       return MACH_SEND_TOO_LARGE;
+               }
                ikm_prealloc_set_inuse(kmsg, dest_port);
                ikm_prealloc_set_inuse(kmsg, dest_port);
+               ikm_set_header(kmsg, msg_and_trailer_size);
                ip_unlock(dest_port);
        }
        else
                ip_unlock(dest_port);
        }
        else
-#endif /* !__LP64__ */
        {
                kmsg = ipc_kmsg_alloc(msg_and_trailer_size);
                if (kmsg == IKM_NULL)
        {
                kmsg = ipc_kmsg_alloc(msg_and_trailer_size);
                if (kmsg == IKM_NULL)
index db4df8ad5a68e7ad043c6b7261f4e20246c4e18d..8687cafbfb115f2cc88fa0822d289f2ff4eca82c 100644 (file)
@@ -162,6 +162,12 @@ MACRO_BEGIN                                                                \
        assert((kmsg)->ikm_next == IKM_BOGUS);                          \
 MACRO_END
 
        assert((kmsg)->ikm_next == IKM_BOGUS);                          \
 MACRO_END
 
+#define ikm_set_header(kmsg, mtsize)                                   \
+MACRO_BEGIN                                                            \
+       (kmsg)->ikm_header = (mach_msg_header_t *)                      \
+       ((vm_offset_t)((kmsg) + 1) + (kmsg)->ikm_size - (mtsize));      \
+MACRO_END
+
 struct ipc_kmsg_queue {
        struct ipc_kmsg *ikmq_base;
 };
 struct ipc_kmsg_queue {
        struct ipc_kmsg *ikmq_base;
 };
@@ -267,13 +273,16 @@ extern void ipc_kmsg_destroy(
 extern void ipc_kmsg_destroy_dest(
        ipc_kmsg_t      kmsg);
 
 extern void ipc_kmsg_destroy_dest(
        ipc_kmsg_t      kmsg);
 
-
 /* Preallocate a kernel message buffer */
 /* Preallocate a kernel message buffer */
+extern ipc_kmsg_t ipc_kmsg_prealloc(
+       mach_msg_size_t size);
+
+/* bind a preallocated message buffer to a port */
 extern void ipc_kmsg_set_prealloc(
        ipc_kmsg_t      kmsg,
        ipc_port_t      port);
 
 extern void ipc_kmsg_set_prealloc(
        ipc_kmsg_t      kmsg,
        ipc_port_t      port);
 
-/* Clear a kernel message buffer */
+/* Clear preallocated message buffer binding */
 extern void ipc_kmsg_clear_prealloc(
        ipc_kmsg_t      kmsg,
        ipc_port_t      port);
 extern void ipc_kmsg_clear_prealloc(
        ipc_kmsg_t      kmsg,
        ipc_port_t      port);
index eaa7bad401df82d8aff9d1f72c31aafb5e81309e..76185c9ba74c8cde353a7c546b20efe19331e653 100644 (file)
@@ -107,6 +107,7 @@ decl_lck_mtx_data(, ipc_port_timestamp_lock_data)
 lck_mtx_ext_t  ipc_port_multiple_lock_data_ext;
 lck_mtx_ext_t  ipc_port_timestamp_lock_data_ext;
 ipc_port_timestamp_t   ipc_port_timestamp_data;
 lck_mtx_ext_t  ipc_port_multiple_lock_data_ext;
 lck_mtx_ext_t  ipc_port_timestamp_lock_data_ext;
 ipc_port_timestamp_t   ipc_port_timestamp_data;
+int ipc_portbt;
 
 #if    MACH_ASSERT
 void   ipc_port_init_debug(
 
 #if    MACH_ASSERT
 void   ipc_port_init_debug(
@@ -1235,8 +1236,14 @@ ipc_port_debug_init(void)
 {
        queue_init(&port_alloc_queue);
        lck_mtx_init_ext(&port_alloc_queue_lock, &port_alloc_queue_lock_ext, &ipc_lck_grp, &ipc_lck_attr);
 {
        queue_init(&port_alloc_queue);
        lck_mtx_init_ext(&port_alloc_queue_lock, &port_alloc_queue_lock_ext, &ipc_lck_grp, &ipc_lck_attr);
+
+       if (!PE_parse_boot_argn("ipc_portbt", &ipc_portbt, sizeof (ipc_portbt)))
+               ipc_portbt = 0;
 }
 
 }
 
+#ifdef MACH_BSD
+extern int proc_pid(struct proc*);
+#endif /* MACH_BSD */
 
 /*
  *     Initialize all of the debugging state in a port.
 
 /*
  *     Initialize all of the debugging state in a port.
@@ -1255,12 +1262,22 @@ ipc_port_init_debug(
        for (i = 0; i < IP_NSPARES; ++i)
                port->ip_spares[i] = 0;
 
        for (i = 0; i < IP_NSPARES; ++i)
                port->ip_spares[i] = 0;
 
+#ifdef MACH_BSD
+       task_t task = current_task();
+       if (task != TASK_NULL) {
+               struct proc* proc = (struct proc*) get_bsdtask_info(task);
+               if (proc)
+                       port->ip_spares[0] = proc_pid(proc);
+       }
+#endif /* MACH_BSD */
+
        /*
         *      Machine-dependent routine to fill in an
         *      array with up to IP_CALLSTACK_MAX levels
         *      of return pc information.
         */
        /*
         *      Machine-dependent routine to fill in an
         *      array with up to IP_CALLSTACK_MAX levels
         *      of return pc information.
         */
-       machine_callstack(&port->ip_callstack[0], IP_CALLSTACK_MAX);
+       if (ipc_portbt)
+               machine_callstack(&port->ip_callstack[0], IP_CALLSTACK_MAX);
 
 #if 0
        lck_mtx_lock(&port_alloc_queue_lock);
 
 #if 0
        lck_mtx_lock(&port_alloc_queue_lock);
index 7249fe96a9be13e75fc05f9f020397f77e435396..4998a84bc6e860b3186daaae68767f3ad3d8de42 100644 (file)
@@ -150,8 +150,8 @@ struct ipc_port {
 #endif
 
 #if    MACH_ASSERT
 #endif
 
 #if    MACH_ASSERT
-#define        IP_NSPARES              10
-#define        IP_CALLSTACK_MAX        10
+#define        IP_NSPARES              4
+#define        IP_CALLSTACK_MAX        16
        queue_chain_t   ip_port_links;  /* all allocated ports */
        thread_t        ip_thread;      /* who made me?  thread context */
        unsigned long   ip_timetrack;   /* give an idea of "when" created */
        queue_chain_t   ip_port_links;  /* all allocated ports */
        thread_t        ip_thread;      /* who made me?  thread context */
        unsigned long   ip_timetrack;   /* give an idea of "when" created */
index 28d0fbc87d2fdc10c0be4d93f9bfd9a538f9e048..389e80bb1867c1313c16846197d52ddab8fa3668 100644 (file)
@@ -644,9 +644,11 @@ mach_port_allocate_full(
                        return KERN_RESOURCE_SHORTAGE;
                } else {
                        mach_msg_size_t size = qosp->len + MAX_TRAILER_SIZE;
                        return KERN_RESOURCE_SHORTAGE;
                } else {
                        mach_msg_size_t size = qosp->len + MAX_TRAILER_SIZE;
+
                        if (right != MACH_PORT_RIGHT_RECEIVE)
                                return (KERN_INVALID_VALUE);
                        if (right != MACH_PORT_RIGHT_RECEIVE)
                                return (KERN_INVALID_VALUE);
-                       kmsg = (ipc_kmsg_t)ipc_kmsg_alloc(size);
+
+                       kmsg = (ipc_kmsg_t)ipc_kmsg_prealloc(size);
                        if (kmsg == IKM_NULL)
                                return (KERN_RESOURCE_SHORTAGE);
                }
                        if (kmsg == IKM_NULL)
                                return (KERN_RESOURCE_SHORTAGE);
                }
index dbacccfd85276f1c0aa275a0ba302b7f286ac8e5..df1d6d953d00ef9b7943e6c81f9db34b0fda4047 100644 (file)
 #include <mach/mach_types.h>
 #include <sys/appleapiopts.h>
 #include <kern/debug.h>
 #include <mach/mach_types.h>
 #include <sys/appleapiopts.h>
 #include <kern/debug.h>
+#include <uuid/uuid.h>
 
 #include <kdp/kdp_internal.h>
 #include <kdp/kdp_private.h>
 #include <kdp/kdp_core.h>
 
 #include <kdp/kdp_internal.h>
 #include <kdp/kdp_private.h>
 #include <kdp/kdp_core.h>
+#include <kdp/kdp_dyld.h>
 
 #include <libsa/types.h>
 
 
 #include <libsa/types.h>
 
@@ -115,6 +117,7 @@ int noresume_on_disconnect = 0;
 extern unsigned int return_on_panic;
 
 typedef struct thread_snapshot *thread_snapshot_t;
 extern unsigned int return_on_panic;
 
 typedef struct thread_snapshot *thread_snapshot_t;
+typedef struct task_snapshot *task_snapshot_t;
 
 extern int
 machine_trace_thread(thread_t thread, char *tracepos, char *tracebound, int nframes, boolean_t user_p);
 
 extern int
 machine_trace_thread(thread_t thread, char *tracepos, char *tracebound, int nframes, boolean_t user_p);
@@ -143,7 +146,7 @@ kdp_remove_breakpoint_internal(
 
 
 int
 
 
 int
-kdp_stackshot(int pid, void *tracebuf, uint32_t tracebuf_size, unsigned trace_options, uint32_t *pbytesTraced);
+kdp_stackshot(int pid, void *tracebuf, uint32_t tracebuf_size, uint32_t trace_flags, uint32_t dispatch_offset, uint32_t *pbytesTraced);
 
 boolean_t kdp_copyin(pmap_t, uint64_t, void *, size_t);
 extern void bcopy_phys(addr64_t, addr64_t, vm_size_t);
 
 boolean_t kdp_copyin(pmap_t, uint64_t, void *, size_t);
 extern void bcopy_phys(addr64_t, addr64_t, vm_size_t);
@@ -1064,7 +1067,7 @@ kdp_copyin(pmap_t p, uint64_t uaddr, void *dest, size_t size) {
 }
 
 int
 }
 
 int
-kdp_stackshot(int pid, void *tracebuf, uint32_t tracebuf_size, unsigned trace_options, uint32_t *pbytesTraced)
+kdp_stackshot(int pid, void *tracebuf, uint32_t tracebuf_size, uint32_t trace_flags, uint32_t dispatch_offset, uint32_t *pbytesTraced)
 {
        char *tracepos = (char *) tracebuf;
        char *tracebound = tracepos + tracebuf_size;
 {
        char *tracepos = (char *) tracebuf;
        char *tracebound = tracepos + tracebuf_size;
@@ -1073,49 +1076,105 @@ kdp_stackshot(int pid, void *tracebuf, uint32_t tracebuf_size, unsigned trace_op
 
        task_t task = TASK_NULL;
        thread_t thread = THREAD_NULL;
 
        task_t task = TASK_NULL;
        thread_t thread = THREAD_NULL;
-       int nframes = trace_options;
        thread_snapshot_t tsnap = NULL;
        unsigned framesize = 2 * sizeof(vm_offset_t);
        thread_snapshot_t tsnap = NULL;
        unsigned framesize = 2 * sizeof(vm_offset_t);
-       boolean_t dispatch_p = ((trace_options & STACKSHOT_GET_DQ) != 0);
-       uint16_t  dispatch_offset = (trace_options & STACKSHOT_DISPATCH_OFFSET_MASK) >> STACKSHOT_DISPATCH_OFFSET_SHIFT;
        struct task ctask;
        struct thread cthread;
        struct task ctask;
        struct thread cthread;
-
-       if ((nframes <= 0) || nframes > MAX_FRAMES)
-               nframes = MAX_FRAMES;
+       
+       boolean_t dispatch_p = ((trace_flags & STACKSHOT_GET_DQ) != 0);
+       boolean_t save_loadinfo_p = ((trace_flags & STACKSHOT_SAVE_LOADINFO) != 0);
 
        queue_iterate(&tasks, task, task_t, tasks) {
 
        queue_iterate(&tasks, task, task_t, tasks) {
+               int task_pid = pid_from_task(task);
+               boolean_t task64 = task_has_64BitAddr(task);
+
                if ((task == NULL) || (ml_nofault_copy((vm_offset_t) task, (vm_offset_t) &ctask, sizeof(struct task)) != sizeof(struct task)))
                        goto error_exit;
                if ((task == NULL) || (ml_nofault_copy((vm_offset_t) task, (vm_offset_t) &ctask, sizeof(struct task)) != sizeof(struct task)))
                        goto error_exit;
+
                /* Trace everything, unless a process was specified */
                /* Trace everything, unless a process was specified */
-               if ((pid == -1) || (pid == pid_from_task(task)))
+               if ((pid == -1) || (pid == task_pid)) {
+                       task_snapshot_t task_snap;
+                       uint32_t uuid_info_count;
+                       mach_vm_address_t uuid_info_addr;
+
+                       if (save_loadinfo_p && task_pid > 0) {
+                               // Read the dyld_all_image_infos struct from the task memory to get UUID array count and location
+                               if (task64) {
+                                       struct dyld_all_image_infos64 task_image_infos;
+                                       if (!kdp_copyin(task->map->pmap, task->all_image_info_addr, &task_image_infos, sizeof(struct dyld_all_image_infos64)))
+                                               goto error_exit;
+                                       uuid_info_count = (uint32_t)task_image_infos.uuidArrayCount;
+                                       uuid_info_addr = task_image_infos.uuidArray;
+                               } else {
+                                       struct dyld_all_image_infos task_image_infos;
+                                       if (!kdp_copyin(task->map->pmap, task->all_image_info_addr, &task_image_infos, sizeof(struct dyld_all_image_infos)))
+                                               goto error_exit;
+                                       uuid_info_count = task_image_infos.uuidArrayCount;
+                                       uuid_info_addr = task_image_infos.uuidArray;
+                               }
+                       } else {
+                               uuid_info_count = 0;
+                               uuid_info_addr = 0;
+                       }
+
+                       if (tracepos + sizeof(struct task_snapshot) > tracebound) {
+                               error = -1;
+                               goto error_exit;
+                       }
+
+                       task_snap = (task_snapshot_t) tracepos;
+                       task_snap->snapshot_magic = STACKSHOT_TASK_SNAPSHOT_MAGIC;
+                       task_snap->pid = task_pid;
+                       task_snap->nloadinfos = uuid_info_count;
+                       /* Add the BSD process identifiers */
+                       if (task_pid != -1)
+                               proc_name_kdp(task, task_snap->p_comm, sizeof(task_snap->p_comm));
+                       else
+                               task_snap->p_comm[0] = '\0';
+                       task_snap->ss_flags = 0;
+                       if (task64)
+                               task_snap->ss_flags |= kUser64_p;
+                       
+                       tracepos += sizeof(struct task_snapshot);
+
+                       if (task_pid > 0 && uuid_info_count > 0) {
+                               uint32_t uuid_info_size = (uint32_t)(task64 ? sizeof(struct dyld_uuid_info64) : sizeof(struct dyld_uuid_info));
+                               uint32_t uuid_info_array_size = uuid_info_count * uuid_info_size;
+
+                               if (tracepos + uuid_info_array_size > tracebound) {
+                                       error = -1;
+                                       goto error_exit;
+                               }
+
+                               // Copy in the UUID info array
+                               if (!kdp_copyin(task->map->pmap, uuid_info_addr, tracepos, uuid_info_array_size))
+                                       goto error_exit;
+
+                               tracepos += uuid_info_array_size;
+                       }
+
                        queue_iterate(&task->threads, thread, thread_t, task_threads){
                                if ((thread == NULL) || (ml_nofault_copy((vm_offset_t) thread, (vm_offset_t) &cthread, sizeof(struct thread)) != sizeof(struct thread)))
                                        goto error_exit;
                        queue_iterate(&task->threads, thread, thread_t, task_threads){
                                if ((thread == NULL) || (ml_nofault_copy((vm_offset_t) thread, (vm_offset_t) &cthread, sizeof(struct thread)) != sizeof(struct thread)))
                                        goto error_exit;
+
                                if (((tracepos + 4 * sizeof(struct thread_snapshot)) > tracebound)) {
                                        error = -1;
                                        goto error_exit;
                                }
                                if (((tracepos + 4 * sizeof(struct thread_snapshot)) > tracebound)) {
                                        error = -1;
                                        goto error_exit;
                                }
-/* Populate the thread snapshot header */
+                               /* Populate the thread snapshot header */
                                tsnap = (thread_snapshot_t) tracepos;
                                tsnap->thread_id = (uint64_t) (uintptr_t)thread;
                                tsnap->state = thread->state;
                                tsnap->wait_event = thread->wait_event;
                                tsnap->continuation = (uint64_t) (uintptr_t) thread->continuation;
                                tsnap = (thread_snapshot_t) tracepos;
                                tsnap->thread_id = (uint64_t) (uintptr_t)thread;
                                tsnap->state = thread->state;
                                tsnap->wait_event = thread->wait_event;
                                tsnap->continuation = (uint64_t) (uintptr_t) thread->continuation;
-/* Add the BSD process identifiers */
-                               if ((tsnap->pid = pid_from_task(task)) != -1)
-                                       proc_name_kdp(task, tsnap->p_comm, sizeof(tsnap->p_comm));
-                               else
-                                       tsnap->p_comm[0] = '\0';
 
 
-                               tsnap->snapshot_magic = 0xfeedface;
+                               tsnap->snapshot_magic = STACKSHOT_THREAD_SNAPSHOT_MAGIC;
                                tracepos += sizeof(struct thread_snapshot);
                                tsnap->ss_flags = 0;
 
                                if (dispatch_p && (task != kernel_task) && (task->active) && (task->map)) {
                                        uint64_t dqkeyaddr = thread_dispatchqaddr(thread);
                                        if (dqkeyaddr != 0) {
                                tracepos += sizeof(struct thread_snapshot);
                                tsnap->ss_flags = 0;
 
                                if (dispatch_p && (task != kernel_task) && (task->active) && (task->map)) {
                                        uint64_t dqkeyaddr = thread_dispatchqaddr(thread);
                                        if (dqkeyaddr != 0) {
-                                               boolean_t task64 = task_has_64BitAddr(task);
                                                uint64_t dqaddr = 0;
                                                if (kdp_copyin(task->map->pmap, dqkeyaddr, &dqaddr, (task64 ? 8 : 4)) && (dqaddr != 0)) {
                                                        uint64_t dqserialnumaddr = dqaddr + dispatch_offset;
                                                uint64_t dqaddr = 0;
                                                if (kdp_copyin(task->map->pmap, dqkeyaddr, &dqaddr, (task64 ? 8 : 4)) && (dqaddr != 0)) {
                                                        uint64_t dqserialnumaddr = dqaddr + dispatch_offset;
@@ -1133,27 +1192,27 @@ kdp_stackshot(int pid, void *tracebuf, uint32_t tracebuf_size, unsigned trace_op
  */
                                if (thread->kernel_stack != 0) {
 #if defined(__LP64__)                                  
  */
                                if (thread->kernel_stack != 0) {
 #if defined(__LP64__)                                  
-                                       tracebytes = machine_trace_thread64(thread, tracepos, tracebound, nframes, FALSE);
+                                       tracebytes = machine_trace_thread64(thread, tracepos, tracebound, MAX_FRAMES, FALSE);
                                        tsnap->ss_flags |= kKernel64_p;
                                        framesize = 16;
 #else
                                        tsnap->ss_flags |= kKernel64_p;
                                        framesize = 16;
 #else
-                                       tracebytes = machine_trace_thread(thread, tracepos, tracebound, nframes, FALSE);
+                                       tracebytes = machine_trace_thread(thread, tracepos, tracebound, MAX_FRAMES, FALSE);
                                        framesize = 8;
 #endif
                                }
                                tsnap->nkern_frames = tracebytes/framesize;
                                tracepos += tracebytes;
                                tracebytes = 0;
                                        framesize = 8;
 #endif
                                }
                                tsnap->nkern_frames = tracebytes/framesize;
                                tracepos += tracebytes;
                                tracebytes = 0;
-/* Trace user stack, if any */
+                               /* Trace user stack, if any */
                                if (thread->task->map != kernel_map) {
                                        /* 64-bit task? */
                                        if (task_has_64BitAddr(thread->task)) {
                                if (thread->task->map != kernel_map) {
                                        /* 64-bit task? */
                                        if (task_has_64BitAddr(thread->task)) {
-                                               tracebytes = machine_trace_thread64(thread, tracepos, tracebound, nframes, TRUE);
+                                               tracebytes = machine_trace_thread64(thread, tracepos, tracebound, MAX_FRAMES, TRUE);
                                                tsnap->ss_flags |= kUser64_p;
                                                framesize = 16;
                                        }
                                        else {
                                                tsnap->ss_flags |= kUser64_p;
                                                framesize = 16;
                                        }
                                        else {
-                                               tracebytes = machine_trace_thread(thread, tracepos, tracebound, nframes, TRUE);
+                                               tracebytes = machine_trace_thread(thread, tracepos, tracebound, MAX_FRAMES, TRUE);
                                                framesize = 8;
                                        }
                                }
                                                framesize = 8;
                                        }
                                }
@@ -1161,6 +1220,7 @@ kdp_stackshot(int pid, void *tracebuf, uint32_t tracebuf_size, unsigned trace_op
                                tracepos += tracebytes;
                                tracebytes = 0;
                        }
                                tracepos += tracebytes;
                                tracebytes = 0;
                        }
+               }
        }
 
 error_exit:
        }
 
 error_exit:
diff --git a/osfmk/kdp/kdp_dyld.h b/osfmk/kdp/kdp_dyld.h
new file mode 100644 (file)
index 0000000..ef22857
--- /dev/null
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+/*
+ * Data structure definitions copied from dyld so that we can read dyld's saved UUID information
+ * for each binary image not loaded from the shared cache during stackshots.
+ */
+
+/* From dyld/include/dyld_images.h */
+
+struct dyld_uuid_info {
+       user32_addr_t   imageLoadAddress;       /* base address image is mapped into */
+       uuid_t                  imageUUID;                      /* UUID of image */
+};
+
+struct dyld_uuid_info64 {
+       user64_addr_t   imageLoadAddress;       /* base address image is mapped into */
+       uuid_t                  imageUUID;                      /* UUID of image */
+};
+
+// FIXME: dyld is in C++, and some of the fields in dyld_all_image_infos are C++ 
+// native booleans.  There must be a better way...
+typedef uint8_t bool;
+
+struct dyld_all_image_infos {
+       uint32_t                                        version;
+       uint32_t                                        infoArrayCount;
+       user32_addr_t                           infoArray;
+       user32_addr_t                           notification;
+       bool                                            processDetachedFromSharedRegion;
+       bool                                            libSystemInitialized;
+       user32_addr_t                           dyldImageLoadAddress;
+       user32_addr_t                           jitInfo;
+       user32_addr_t                           dyldVersion;
+       user32_addr_t                           errorMessage;
+       user32_addr_t                           terminationFlags;
+       user32_addr_t                           coreSymbolicationShmPage;
+       user32_addr_t                           systemOrderFlag;
+       user32_size_t                           uuidArrayCount; // dyld defines this as a uintptr_t despite it being a count
+       user32_addr_t                           uuidArray;
+};
+
+struct dyld_all_image_infos64 {
+       uint32_t                                        version;
+       uint32_t                                        infoArrayCount;
+       user64_addr_t                           infoArray;
+       user64_addr_t                           notification;
+       bool                                            processDetachedFromSharedRegion;
+       bool                                            libSystemInitialized;
+       user64_addr_t                           dyldImageLoadAddress;
+       user64_addr_t                           jitInfo;
+       user64_addr_t                           dyldVersion;
+       user64_addr_t                           errorMessage;
+       user64_addr_t                           terminationFlags;
+       user64_addr_t                           coreSymbolicationShmPage;
+       user64_addr_t                           systemOrderFlag;
+       user64_size_t                           uuidArrayCount; // dyld defines this as a uintptr_t despite it being a count
+       user64_addr_t                           uuidArray;
+};
index 5cab18769328e1e3a7fcd19a74fafa08a1cc2878..0a54c5f2e22ee8f068eae6bea32a685ff6391093 100644 (file)
@@ -195,20 +195,21 @@ static unsigned stack_snapshot_bytes_traced = 0;
 static void *stack_snapshot_buf;
 static uint32_t stack_snapshot_bufsize;
 static int stack_snapshot_pid;
 static void *stack_snapshot_buf;
 static uint32_t stack_snapshot_bufsize;
 static int stack_snapshot_pid;
-static uint32_t stack_snapshot_options;
+static uint32_t stack_snapshot_flags;
+static uint32_t stack_snapshot_dispatch_offset;
 
 static unsigned int old_debugger;
 
 void
 kdp_snapshot_preflight(int pid, void * tracebuf, uint32_t tracebuf_size,
 
 static unsigned int old_debugger;
 
 void
 kdp_snapshot_preflight(int pid, void * tracebuf, uint32_t tracebuf_size,
-    uint32_t options);
+    uint32_t flags, uint32_t dispatch_offset);
 
 void
 kdp_snapshot_postflight(void);
 
 extern int
 kdp_stackshot(int pid, void *tracebuf, uint32_t tracebuf_size,
 
 void
 kdp_snapshot_postflight(void);
 
 extern int
 kdp_stackshot(int pid, void *tracebuf, uint32_t tracebuf_size,
-    unsigned trace_options, uint32_t *pbytesTraced);
+    uint32_t flags, uint32_t dispatch_offset, uint32_t *pbytesTraced);
 
 int
 kdp_stack_snapshot_geterror(void);
 
 int
 kdp_stack_snapshot_geterror(void);
@@ -308,12 +309,13 @@ kdp_unregister_send_receive(
 
 /* Cache stack snapshot parameters in preparation for a trace */
 void
 
 /* Cache stack snapshot parameters in preparation for a trace */
 void
-kdp_snapshot_preflight(int pid, void * tracebuf, uint32_t tracebuf_size, uint32_t options)
+kdp_snapshot_preflight(int pid, void * tracebuf, uint32_t tracebuf_size, uint32_t flags, uint32_t dispatch_offset)
 {
        stack_snapshot_pid = pid;
        stack_snapshot_buf = tracebuf;
        stack_snapshot_bufsize = tracebuf_size;
 {
        stack_snapshot_pid = pid;
        stack_snapshot_buf = tracebuf;
        stack_snapshot_bufsize = tracebuf_size;
-       stack_snapshot_options = options;
+       stack_snapshot_flags = flags;
+       stack_snapshot_dispatch_offset = dispatch_offset;
        kdp_snapshot++;
        /* Mark this debugger as active, since the polled mode driver that 
         * ordinarily does this may not be enabled (yet), or since KDB may be
        kdp_snapshot++;
        /* Mark this debugger as active, since the polled mode driver that 
         * ordinarily does this may not be enabled (yet), or since KDB may be
@@ -1114,7 +1116,8 @@ kdp_raise_exception(
     if (kdp_snapshot && (!panic_active()) && (panic_caller == 0)) {
            stack_snapshot_ret = kdp_stackshot(stack_snapshot_pid,
            stack_snapshot_buf, stack_snapshot_bufsize,
     if (kdp_snapshot && (!panic_active()) && (panic_caller == 0)) {
            stack_snapshot_ret = kdp_stackshot(stack_snapshot_pid,
            stack_snapshot_buf, stack_snapshot_bufsize,
-           stack_snapshot_options, &stack_snapshot_bytes_traced);
+           stack_snapshot_flags, stack_snapshot_dispatch_offset, 
+               &stack_snapshot_bytes_traced);
            return;
     }
 
            return;
     }
 
index acec7297915a47085b89721801a997908b49c354..39aa1f425972716180df81c687d0f18a59145874 100644 (file)
@@ -431,16 +431,19 @@ extern const char version[];
 extern char osversion[];
 
 __private_extern__ void panic_display_system_configuration(void) {
 extern char osversion[];
 
 __private_extern__ void panic_display_system_configuration(void) {
-       static boolean_t config_displayed = FALSE;
+       static volatile boolean_t config_displayed = FALSE;
 
        panic_display_process_name();
        if (config_displayed == FALSE) {
 
        panic_display_process_name();
        if (config_displayed == FALSE) {
+               config_displayed = TRUE;
                kdb_printf("\nMac OS version:\n%s\n",
                    (osversion[0] != 0) ? osversion : "Not yet set");
                kdb_printf("\nKernel version:\n%s\n",version);
                panic_display_model_name();
                panic_display_uptime();
                kdb_printf("\nMac OS version:\n%s\n",
                    (osversion[0] != 0) ? osversion : "Not yet set");
                kdb_printf("\nKernel version:\n%s\n",version);
                panic_display_model_name();
                panic_display_uptime();
-               config_displayed = TRUE;
+#if    defined(__i386__) || defined(__x86_64__)
+               pmap_pagetable_corruption_msg_log(&kdb_printf);
+#endif /* i386 || x86_64 */
                panic_display_zprint();
                kext_dump_panic_lists(&kdb_log);
        }
                panic_display_zprint();
                kext_dump_panic_lists(&kdb_log);
        }
index d4ad172b9eb281f489d5fda3ffefd439834f3e8d..308435ece684501fc7a48d4f017dd3f6a63253fc 100644 (file)
@@ -39,12 +39,18 @@ struct thread_snapshot {
        uint32_t                snapshot_magic;
        uint32_t                nkern_frames;
        uint32_t                nuser_frames;
        uint32_t                snapshot_magic;
        uint32_t                nkern_frames;
        uint32_t                nuser_frames;
-       int32_t                 pid;
        uint64_t                wait_event;
        uint64_t                continuation;
        uint64_t                thread_id;
        int32_t                 state;
        char                    ss_flags;
        uint64_t                wait_event;
        uint64_t                continuation;
        uint64_t                thread_id;
        int32_t                 state;
        char                    ss_flags;
+} __attribute__ ((packed));
+
+struct task_snapshot {
+       uint32_t                snapshot_magic;
+       int32_t                 pid;
+       uint32_t                nloadinfos;
+       char                    ss_flags;
        /* We restrict ourselves to a statically defined
         * (current as of 2009) length for the
         * p_comm string, due to scoping issues (osfmk/bsd and user/kernel
        /* We restrict ourselves to a statically defined
         * (current as of 2009) length for the
         * p_comm string, due to scoping issues (osfmk/bsd and user/kernel
@@ -59,9 +65,13 @@ enum {
        kHasDispatchSerial = 0x4
 };
 
        kHasDispatchSerial = 0x4
 };
 
-enum   {STACKSHOT_GET_DQ = 1};
-#define STACKSHOT_DISPATCH_OFFSET_MASK 0xffff0000
-#define STACKSHOT_DISPATCH_OFFSET_SHIFT 16 
+enum {
+    STACKSHOT_GET_DQ = 0x1,
+    STACKSHOT_SAVE_LOADINFO = 0x2
+};
+
+#define STACKSHOT_THREAD_SNAPSHOT_MAGIC 0xfeedface
+#define STACKSHOT_TASK_SNAPSHOT_MAGIC 0xdecafbad
 
 #endif /* __APPLE_API_UNSTABLE */
 #endif /* __APPLE_API_PRIVATE */
 
 #endif /* __APPLE_API_UNSTABLE */
 #endif /* __APPLE_API_PRIVATE */
@@ -70,6 +80,7 @@ enum  {STACKSHOT_GET_DQ = 1};
 
 extern unsigned int    systemLogDiags;
 extern char debug_buf[];
 
 extern unsigned int    systemLogDiags;
 extern char debug_buf[];
+extern unsigned int    debug_boot_arg;
 
 #ifdef MACH_KERNEL_PRIVATE
 
 
 #ifdef MACH_KERNEL_PRIVATE
 
index 3410697249f5369eaedac9c4073475550f0fe061..ca65ceca63674b73db6d379aa313b2c54f25382e 100644 (file)
@@ -155,6 +155,15 @@ processor_init(
        processor_data_init(processor);
        processor->processor_list = NULL;
 
        processor_data_init(processor);
        processor->processor_list = NULL;
 
+       pset_lock(pset);
+       if (pset->cpu_set_count++ == 0)
+               pset->cpu_set_low = pset->cpu_set_hi = cpu_id;
+       else {
+               pset->cpu_set_low = (cpu_id < pset->cpu_set_low)? cpu_id: pset->cpu_set_low;
+               pset->cpu_set_hi = (cpu_id > pset->cpu_set_hi)? cpu_id: pset->cpu_set_hi;
+       }
+       pset_unlock(pset);
+
        simple_lock(&processor_list_lock);
        if (processor_list == NULL)
                processor_list = processor;
        simple_lock(&processor_list_lock);
        if (processor_list == NULL)
                processor_list = processor;
@@ -231,6 +240,8 @@ pset_init(
        queue_init(&pset->idle_queue);
        pset->processor_count = 0;
        pset->low_pri = pset->low_count = PROCESSOR_NULL;
        queue_init(&pset->idle_queue);
        pset->processor_count = 0;
        pset->low_pri = pset->low_count = PROCESSOR_NULL;
+       pset->cpu_set_low = pset->cpu_set_hi = 0;
+       pset->cpu_set_count = 0;
        pset_lock_init(pset);
        pset->pset_self = IP_NULL;
        pset->pset_name_self = IP_NULL;
        pset_lock_init(pset);
        pset->pset_self = IP_NULL;
        pset->pset_name_self = IP_NULL;
index fcf61d0444612d7e04fa4dbfc64d74bbd1c461f5..342a90081ef53999391ce7e7be8c6c7120c03944 100644 (file)
@@ -89,6 +89,9 @@ struct processor_set {
 
        int                                     processor_count;
 
 
        int                                     processor_count;
 
+       int                                     cpu_set_low, cpu_set_hi;
+       int                                     cpu_set_count;
+
        decl_simple_lock_data(,sched_lock)      /* lock for above */
 
        struct ipc_port *       pset_self;              /* port for operations */
        decl_simple_lock_data(,sched_lock)      /* lock for above */
 
        struct ipc_port *       pset_self;              /* port for operations */
@@ -244,11 +247,15 @@ extern kern_return_t      processor_info_count(
 #define pset_deallocate(x)
 #define pset_reference(x)
 
 #define pset_deallocate(x)
 #define pset_reference(x)
 
-extern void                    machine_run_count(
-                                               uint32_t        count);
+extern void                            machine_run_count(
+                                                       uint32_t        count);
+
+extern boolean_t               machine_processor_is_inactive(
+                                                       processor_t                     processor);
 
 
-extern boolean_t       machine_cpu_is_inactive(
-                                               int                     cpu_id);
+extern processor_t             machine_choose_processor(
+                                                       processor_set_t         pset,
+                                                       processor_t                     processor);
 
 #else  /* MACH_KERNEL_PRIVATE */
 
 
 #else  /* MACH_KERNEL_PRIVATE */
 
index e1e5ae4c0272b9ee64f4393287f731377ef2e4d0..9a153ea2921bf391b285b6a9e20f32e9e0987653 100644 (file)
 #define BASEPRI_FOREGROUND     (BASEPRI_DEFAULT + 16)                          /* 47 */
 #define BASEPRI_BACKGROUND     (BASEPRI_DEFAULT + 15)                          /* 46 */
 #define BASEPRI_DEFAULT                (MAXPRI_USER - (NRQS / 4))                      /* 31 */
 #define BASEPRI_FOREGROUND     (BASEPRI_DEFAULT + 16)                          /* 47 */
 #define BASEPRI_BACKGROUND     (BASEPRI_DEFAULT + 15)                          /* 46 */
 #define BASEPRI_DEFAULT                (MAXPRI_USER - (NRQS / 4))                      /* 31 */
+#define MAXPRI_THROTTLE                (MINPRI + 4)                                            /*  4 */
 #define MINPRI_USER                    MINPRI                                                          /*  0 */
 
 /*
 #define MINPRI_USER                    MINPRI                                                          /*  0 */
 
 /*
index 2dc656aecaf6d703eed9d737fcb99f6f72ff9bef..60191650c365406ed305e2448ae30bb843331f03 100644 (file)
@@ -1104,7 +1104,7 @@ thread_select(
 
                pset_lock(pset);
 
 
                pset_lock(pset);
 
-               inactive_state = processor->state != PROCESSOR_SHUTDOWN && machine_cpu_is_inactive(processor->cpu_id);
+               inactive_state = processor->state != PROCESSOR_SHUTDOWN && machine_processor_is_inactive(processor);
 
                simple_lock(&rt_lock);
 
 
                simple_lock(&rt_lock);
 
@@ -1680,8 +1680,7 @@ thread_dispatch(
                                        thread->realtime.deadline = UINT64_MAX;
                                        thread->reason |= AST_QUANTUM;
                                }
                                        thread->realtime.deadline = UINT64_MAX;
                                        thread->reason |= AST_QUANTUM;
                                }
-                       }
-                       else {
+                       } else {
                                /*
                                 *      For non-realtime threads treat a tiny
                                 *      remaining quantum as an expired quantum
                                /*
                                 *      For non-realtime threads treat a tiny
                                 *      remaining quantum as an expired quantum
@@ -1726,12 +1725,25 @@ thread_dispatch(
                                /*
                                 *      Waiting.
                                 */
                                /*
                                 *      Waiting.
                                 */
+                               boolean_t should_terminate = FALSE;
+
+                               /* Only the first call to thread_dispatch
+                                * after explicit termination should add
+                                * the thread to the termination queue
+                                */
+                               if ((thread->state & (TH_TERMINATE|TH_TERMINATE2)) == TH_TERMINATE) {
+                                       should_terminate = TRUE;
+                                       thread->state |= TH_TERMINATE2;
+                               }
+
                                thread->state &= ~TH_RUN;
 
                                if (thread->sched_mode & TH_MODE_TIMESHARE)
                                        sched_share_decr();
                                sched_run_decr();
 
                                thread->state &= ~TH_RUN;
 
                                if (thread->sched_mode & TH_MODE_TIMESHARE)
                                        sched_share_decr();
                                sched_run_decr();
 
+                               (*thread->sched_call)(SCHED_CALL_BLOCK, thread);
+
                                if (thread->wake_active) {
                                        thread->wake_active = FALSE;
                                        thread_unlock(thread);
                                if (thread->wake_active) {
                                        thread->wake_active = FALSE;
                                        thread_unlock(thread);
@@ -1743,9 +1755,7 @@ thread_dispatch(
 
                                wake_unlock(thread);
 
 
                                wake_unlock(thread);
 
-                               (*thread->sched_call)(SCHED_CALL_BLOCK, thread);
-
-                               if (thread->state & TH_TERMINATE)
+                               if (should_terminate)
                                        thread_terminate_enqueue(thread);
                        }
                }
                                        thread_terminate_enqueue(thread);
                        }
                }
@@ -2232,6 +2242,7 @@ choose_next_pset(
  *     choose_processor:
  *
  *     Choose a processor for the thread, beginning at
  *     choose_processor:
  *
  *     Choose a processor for the thread, beginning at
+ *     the pset.  Accepts an optional processor hint in
  *     the pset.
  *
  *     Returns a processor, possibly from a different pset.
  *     the pset.
  *
  *     Returns a processor, possibly from a different pset.
@@ -2242,19 +2253,25 @@ choose_next_pset(
 static processor_t
 choose_processor(
        processor_set_t         pset,
 static processor_t
 choose_processor(
        processor_set_t         pset,
+       processor_t                     processor,
        thread_t                        thread)
 {
        processor_set_t         nset, cset = pset;
        thread_t                        thread)
 {
        processor_set_t         nset, cset = pset;
-       processor_t                     processor = thread->last_processor;
        processor_meta_t        pmeta = PROCESSOR_META_NULL;
 
        /*
        processor_meta_t        pmeta = PROCESSOR_META_NULL;
 
        /*
-        *      Prefer the last processor, when appropriate.
+        *      Prefer the hinted processor, when appropriate.
         */
        if (processor != PROCESSOR_NULL) {
         */
        if (processor != PROCESSOR_NULL) {
+               processor_t                     mprocessor;
+
                if (processor->processor_meta != PROCESSOR_META_NULL)
                        processor = processor->processor_meta->primary;
 
                if (processor->processor_meta != PROCESSOR_META_NULL)
                        processor = processor->processor_meta->primary;
 
+               mprocessor = machine_choose_processor(pset, processor);
+               if (mprocessor != PROCESSOR_NULL)
+                       processor = mprocessor;
+
                if (processor->processor_set != pset || processor->state == PROCESSOR_INACTIVE ||
                                processor->state == PROCESSOR_SHUTDOWN || processor->state == PROCESSOR_OFF_LINE)
                        processor = PROCESSOR_NULL;
                if (processor->processor_set != pset || processor->state == PROCESSOR_INACTIVE ||
                                processor->state == PROCESSOR_SHUTDOWN || processor->state == PROCESSOR_OFF_LINE)
                        processor = PROCESSOR_NULL;
@@ -2262,6 +2279,18 @@ choose_processor(
                if (processor->state == PROCESSOR_IDLE)
                        return (processor);
        }
                if (processor->state == PROCESSOR_IDLE)
                        return (processor);
        }
+       else {
+               processor = machine_choose_processor(pset, processor);
+
+               if (processor != PROCESSOR_NULL) {
+                       if (processor->processor_set != pset || processor->state == PROCESSOR_INACTIVE ||
+                                       processor->state == PROCESSOR_SHUTDOWN || processor->state == PROCESSOR_OFF_LINE)
+                               processor = PROCESSOR_NULL;
+                       else
+                               if (processor->state == PROCESSOR_IDLE)
+                                       return (processor);
+               }
+       }
 
        /*
         *      Iterate through the processor sets to locate
 
        /*
         *      Iterate through the processor sets to locate
@@ -2447,7 +2476,7 @@ thread_setrun(
                        pset = thread->affinity_set->aset_pset;
                        pset_lock(pset);
 
                        pset = thread->affinity_set->aset_pset;
                        pset_lock(pset);
 
-                       processor = choose_processor(pset, thread);
+                       processor = choose_processor(pset, PROCESSOR_NULL, thread);
                }
                else
                if (thread->last_processor != PROCESSOR_NULL) {
                }
                else
                if (thread->last_processor != PROCESSOR_NULL) {
@@ -2468,10 +2497,10 @@ thread_setrun(
                                 */
                                if (thread->sched_pri <= processor->current_pri ||
                                                thread->realtime.deadline >= processor->deadline)
                                 */
                                if (thread->sched_pri <= processor->current_pri ||
                                                thread->realtime.deadline >= processor->deadline)
-                                       processor = choose_processor(pset, thread);
+                                       processor = choose_processor(pset, PROCESSOR_NULL, thread);
                        }
                        else
                        }
                        else
-                               processor = choose_processor(pset, thread);
+                               processor = choose_processor(pset, processor, thread);
                }
                else {
                        /*
                }
                else {
                        /*
@@ -2489,7 +2518,7 @@ thread_setrun(
                        pset = choose_next_pset(pset);
                        pset_lock(pset);
 
                        pset = choose_next_pset(pset);
                        pset_lock(pset);
 
-                       processor = choose_processor(pset, thread);
+                       processor = choose_processor(pset, PROCESSOR_NULL, thread);
                        task->pset_hint = processor->processor_set;
                }
        }
                        task->pset_hint = processor->processor_set;
                }
        }
@@ -2645,7 +2674,7 @@ csw_check(
                                processor->processor_meta->primary != processor)
                return (AST_PREEMPT);
 
                                processor->processor_meta->primary != processor)
                return (AST_PREEMPT);
 
-       if (machine_cpu_is_inactive(processor->cpu_id))
+       if (machine_processor_is_inactive(processor))
                return (AST_PREEMPT);
 
        if (processor->active_thread->state & TH_SUSP)
                return (AST_PREEMPT);
 
        if (processor->active_thread->state & TH_SUSP)
@@ -2925,7 +2954,7 @@ processor_idle(
 
                (void)splsched();
 
 
                (void)splsched();
 
-               if (processor->state == PROCESSOR_INACTIVE && !machine_cpu_is_inactive(processor->cpu_id))
+               if (processor->state == PROCESSOR_INACTIVE && !machine_processor_is_inactive(processor))
                        break;
        }
 
                        break;
        }
 
index d47b67c528839f23f6467715fc9400d3ceebfc94..9f1c953473eeb0ed74890e5dfcf3b03bf15d81d3 100644 (file)
@@ -159,9 +159,6 @@ extern void         idle_thread(void);
 extern kern_return_t   idle_thread_create(
                                                        processor_t             processor);
 
 extern kern_return_t   idle_thread_create(
                                                        processor_t             processor);
 
-/* Start thread running */
-extern void            thread_bootstrap_return(void);
-
 /* Continuation return from syscall */
 extern void     thread_syscall_return(
                         kern_return_t   ret);
 /* Continuation return from syscall */
 extern void     thread_syscall_return(
                         kern_return_t   ret);
@@ -225,6 +222,9 @@ extern kern_return_t clear_wait(
                                                thread_t                thread,
                                                wait_result_t   result);
 
                                                thread_t                thread,
                                                wait_result_t   result);
 
+/* Start thread running */
+extern void            thread_bootstrap_return(void);
+
 /* Return from exception (BSD-visible interface) */
 extern void            thread_exception_return(void) __dead2;
 
 /* Return from exception (BSD-visible interface) */
 extern void            thread_exception_return(void) __dead2;
 
index 0f027820d988feb6fec35b2aff12ca9e53b02e0f..d3395ddb452030978fe6c4a3fa73751d5ff679e2 100644 (file)
@@ -105,6 +105,16 @@ task_policy_set(
                                task->role = info->role;
                        }
                }
                                task->role = info->role;
                        }
                }
+               else
+               if (info->role == TASK_THROTTLE_APPLICATION) {
+                       task_priority(task, MAXPRI_THROTTLE, MAXPRI_THROTTLE);
+                       task->role = info->role;
+               }
+               else
+               if (info->role == TASK_DEFAULT_APPLICATION) {
+                       task_priority(task, BASEPRI_DEFAULT, MAXPRI_USER);
+                       task->role = info->role;
+               }
                else
                        result = KERN_INVALID_ARGUMENT;
 
                else
                        result = KERN_INVALID_ARGUMENT;
 
index b33a7d2be63d7e44cd6ef21b0380b246a0de7085..581a37c7ffa9637dbf7b685b80efbf8dd98a65d8 100644 (file)
@@ -822,6 +822,7 @@ thread_create_running(
 kern_return_t
 thread_create_workq(
        task_t                          task,
 kern_return_t
 thread_create_workq(
        task_t                          task,
+       thread_continue_t               thread_return,
        thread_t                        *new_thread)
 {
        kern_return_t           result;
        thread_t                        *new_thread)
 {
        kern_return_t           result;
@@ -830,8 +831,7 @@ thread_create_workq(
        if (task == TASK_NULL || task == kernel_task)
                return (KERN_INVALID_ARGUMENT);
 
        if (task == TASK_NULL || task == kernel_task)
                return (KERN_INVALID_ARGUMENT);
 
-       result = thread_create_internal(task, -1, (thread_continue_t)thread_bootstrap_return,
-                                                                                                       TH_OPTION_NOCRED | TH_OPTION_NOSUSP, &thread);
+       result = thread_create_internal(task, -1, thread_return, TH_OPTION_NOCRED | TH_OPTION_NOSUSP, &thread);
        if (result != KERN_SUCCESS)
                return (result);
 
        if (result != KERN_SUCCESS)
                return (result);
 
index 61217f52e283a06d5c154db33a2d6e73e0df218a..db2c6e3520085375b9db019684a92f960cdda985 100644 (file)
@@ -178,6 +178,7 @@ struct thread {
 #define TH_RUN                 0x04                    /* running or on runq */
 #define TH_UNINT               0x08                    /* waiting uninteruptibly */
 #define        TH_TERMINATE    0x10                    /* halted at termination */
 #define TH_RUN                 0x04                    /* running or on runq */
 #define TH_UNINT               0x08                    /* waiting uninteruptibly */
 #define        TH_TERMINATE    0x10                    /* halted at termination */
+#define        TH_TERMINATE2   0x20                    /* added to termination queue */
 
 #define TH_IDLE                        0x80                    /* idling processor */
 
 
 #define TH_IDLE                        0x80                    /* idling processor */
 
@@ -640,6 +641,7 @@ __BEGIN_DECLS
 
 extern kern_return_t   thread_create_workq(
                                                        task_t                  task,
 
 extern kern_return_t   thread_create_workq(
                                                        task_t                  task,
+                                                       thread_continue_t       thread_return,
                                                        thread_t                *new_thread);
 
 extern void    thread_yield_internal(
                                                        thread_t                *new_thread);
 
 extern void    thread_yield_internal(
index ab9bab486a5f237e4346e20768bd78ae906aa023..92f0b642b9d15503020e79d1846da479ef3be081 100644 (file)
@@ -59,6 +59,7 @@ struct thread_call_group {
        timer_call_data_t       delayed_timer;
 
        struct wait_queue       idle_wqueue;
        timer_call_data_t       delayed_timer;
 
        struct wait_queue       idle_wqueue;
+       struct wait_queue       daemon_wqueue;
        uint32_t                        idle_count, active_count;
 };
 
        uint32_t                        idle_count, active_count;
 };
 
@@ -149,6 +150,7 @@ thread_call_initialize(void)
        timer_call_setup(&group->delayed_timer, thread_call_delayed_timer, group);
 
        wait_queue_init(&group->idle_wqueue, SYNC_POLICY_FIFO);
        timer_call_setup(&group->delayed_timer, thread_call_delayed_timer, group);
 
        wait_queue_init(&group->idle_wqueue, SYNC_POLICY_FIFO);
+       wait_queue_init(&group->daemon_wqueue, SYNC_POLICY_FIFO);
 
     queue_init(&thread_call_internal_queue);
     for (
 
     queue_init(&thread_call_internal_queue);
     for (
@@ -772,7 +774,7 @@ thread_call_wake(
        else
        if (!thread_call_daemon_awake) {
                thread_call_daemon_awake = TRUE;
        else
        if (!thread_call_daemon_awake) {
                thread_call_daemon_awake = TRUE;
-               thread_wakeup_one(&thread_call_daemon_awake);
+               wait_queue_wakeup_one(&group->daemon_wqueue, NULL, THREAD_AWAKENED);
        }
 }
 
        }
 }
 
@@ -901,8 +903,8 @@ thread_call_daemon_continue(
                simple_lock(&thread_call_lock);
     }
 
                simple_lock(&thread_call_lock);
     }
 
-       thread_call_daemon_awake = FALSE;
-    assert_wait(&thread_call_daemon_awake, THREAD_UNINT);
+    thread_call_daemon_awake = FALSE;
+    wait_queue_assert_wait(&group->daemon_wqueue, NULL, THREAD_UNINT, 0);
     
     simple_unlock(&thread_call_lock);
        (void) spllo();
     
     simple_unlock(&thread_call_lock);
        (void) spllo();
index a07f4c98fb61e049d0865247ec2a073a08ee7598..3a2fb39c4415b37ea5df7409adc6e976d60dee37 100644 (file)
@@ -109,7 +109,9 @@ enum task_role {
        TASK_FOREGROUND_APPLICATION,
        TASK_BACKGROUND_APPLICATION,
        TASK_CONTROL_APPLICATION,
        TASK_FOREGROUND_APPLICATION,
        TASK_BACKGROUND_APPLICATION,
        TASK_CONTROL_APPLICATION,
-       TASK_GRAPHICS_SERVER
+       TASK_GRAPHICS_SERVER,
+       TASK_THROTTLE_APPLICATION,
+       TASK_DEFAULT_APPLICATION
 };
 
 typedef enum task_role         task_role_t;
 };
 
 typedef enum task_role         task_role_t;
index eed5107711edad8a31ddd9dd63362f387e01c416..6fe17d43cc53a69e4e9d09b033db062bfcf9bba9 100644 (file)
@@ -130,4 +130,12 @@ typedef int                vm_prot_t;
 #define VM_PROT_WANTS_COPY     ((vm_prot_t) 0x10)
 
 
 #define VM_PROT_WANTS_COPY     ((vm_prot_t) 0x10)
 
 
+/*
+ *     The caller wants this memory region treated as if it had a valid
+ *     code signature.
+ */
+
+#define VM_PROT_TRUSTED                ((vm_prot_t) 0x20)
+
+
 #endif /* _MACH_VM_PROT_H_ */
 #endif /* _MACH_VM_PROT_H_ */
index 7edacae01318a0e841cb00b1f1b44990d396642e..bc79f0c7c70fe66ec96180e9ee366a5a23b23456 100644 (file)
@@ -820,11 +820,17 @@ machine_run_count(__unused uint32_t count)
 }
 
 boolean_t
 }
 
 boolean_t
-machine_cpu_is_inactive(__unused int num)
+machine_processor_is_inactive(__unused processor_t processor)
 {
     return(FALSE);
 }
 
 {
     return(FALSE);
 }
 
+processor_t
+machine_choose_processor(__unused processor_set_t pset, processor_t processor)
+{
+    return (processor);
+}
+
 vm_offset_t ml_stack_remaining(void)
 {
        uintptr_t local = (uintptr_t) &local;
 vm_offset_t ml_stack_remaining(void)
 {
        uintptr_t local = (uintptr_t) &local;
index cc652d4f8270f7d3aaa30a65cb3d663cdb4ac1aa..b339dbd7d515bdeac804da4b0068e2cc44981242 100644 (file)
@@ -2351,7 +2351,7 @@ vm_fault_enter(vm_page_t m,
                        /* Page might have been tainted before or not; now it
                         * definitively is. If the page wasn't tainted, we must
                         * disconnect it from all pmaps later. */
                        /* Page might have been tainted before or not; now it
                         * definitively is. If the page wasn't tainted, we must
                         * disconnect it from all pmaps later. */
-                       must_disconnect = ~m->cs_tainted;
+                       must_disconnect = !m->cs_tainted;
                        m->cs_tainted = TRUE;
                        cs_enter_tainted_accepted++;
                }
                        m->cs_tainted = TRUE;
                        cs_enter_tainted_accepted++;
                }
index d48a044fa4a718c21776706856bdc25616c86fb7..a0f5e8c9b07fc7a2a75eb431f1098c2a1b55425a 100644 (file)
@@ -4704,6 +4704,8 @@ vm_map_submap_pmap_clean(
 
        submap_end = offset + (end - start);
        submap_start = offset;
 
        submap_end = offset + (end - start);
        submap_start = offset;
+
+       vm_map_lock_read(sub_map);
        if(vm_map_lookup_entry(sub_map, offset, &entry)) {
                
                remove_size = (entry->vme_end - entry->vme_start);
        if(vm_map_lookup_entry(sub_map, offset, &entry)) {
                
                remove_size = (entry->vme_end - entry->vme_start);
@@ -4775,7 +4777,8 @@ vm_map_submap_pmap_clean(
                        }
                }
                entry = entry->vme_next;
                        }
                }
                entry = entry->vme_next;
-       } 
+       }
+       vm_map_unlock_read(sub_map);
        return;
 }
 
        return;
 }
 
@@ -12547,3 +12550,95 @@ void vm_map_switch_protect(vm_map_t    map,
        map->switch_protect=val;
        vm_map_unlock(map);
 }
        map->switch_protect=val;
        vm_map_unlock(map);
 }
+
+/* Add (generate) code signature for memory range */
+#if CONFIG_DYNAMIC_CODE_SIGNING
+kern_return_t vm_map_sign(vm_map_t map, 
+                vm_map_offset_t start, 
+                vm_map_offset_t end)
+{
+       vm_map_entry_t entry;
+       vm_page_t m;
+       vm_object_t object;
+       
+       /*
+        * Vet all the input parameters and current type and state of the
+        * underlaying object.  Return with an error if anything is amiss.
+        */
+       if (map == VM_MAP_NULL)
+               return(KERN_INVALID_ARGUMENT);
+               
+       vm_map_lock_read(map);
+       
+       if (!vm_map_lookup_entry(map, start, &entry) || entry->is_sub_map) {
+               /*
+                * Must pass a valid non-submap address.
+                */
+               vm_map_unlock_read(map);
+               return(KERN_INVALID_ADDRESS);
+       }
+       
+       if((entry->vme_start > start) || (entry->vme_end < end)) {
+               /*
+                * Map entry doesn't cover the requested range. Not handling
+                * this situation currently.
+                */
+               vm_map_unlock_read(map);
+               return(KERN_INVALID_ARGUMENT);
+       }
+       
+       object = entry->object.vm_object;
+       if (object == VM_OBJECT_NULL) {
+               /*
+                * Object must already be present or we can't sign.
+                */
+               vm_map_unlock_read(map);
+               return KERN_INVALID_ARGUMENT;
+       }
+       
+       vm_object_lock(object);
+       vm_map_unlock_read(map);
+       
+       while(start < end) {
+               uint32_t refmod;
+               
+               m = vm_page_lookup(object, start - entry->vme_start + entry->offset );
+               if (m==VM_PAGE_NULL) {
+                       /* shoud we try to fault a page here? we can probably 
+                        * demand it exists and is locked for this request */
+                       vm_object_unlock(object);
+                       return KERN_FAILURE;
+               }
+               /* deal with special page status */
+               if (m->busy || 
+                   (m->unusual && (m->error || m->restart || m->private || m->absent))) {
+                       vm_object_unlock(object);
+                       return KERN_FAILURE;
+               }
+               
+               /* Page is OK... now "validate" it */
+               /* This is the place where we'll call out to create a code 
+                * directory, later */
+               m->cs_validated = TRUE;
+
+               /* The page is now "clean" for codesigning purposes. That means
+                * we don't consider it as modified (wpmapped) anymore. But 
+                * we'll disconnect the page so we note any future modification
+                * attempts. */
+               m->wpmapped = FALSE;
+               refmod = pmap_disconnect(m->phys_page);
+               
+               /* Pull the dirty status from the pmap, since we cleared the 
+                * wpmapped bit */
+               if ((refmod & VM_MEM_MODIFIED) && !m->dirty) {
+                       m->dirty = TRUE;
+               }
+               
+               /* On to the next page */
+               start += PAGE_SIZE;
+       }
+       vm_object_unlock(object);
+       
+       return KERN_SUCCESS;
+}
+#endif
index f520087ed448f12248465069285d37388a62bfd6..09eaa747306159c2f0da4f21677cdbc947aa6d53 100644 (file)
@@ -1024,6 +1024,12 @@ extern kern_return_t vm_map_get_upl(
                                int             *flags,
                                int             force_data_sync);
 
                                int             *flags,
                                int             force_data_sync);
 
+#if CONFIG_DYNAMIC_CODE_SIGNING
+extern kern_return_t vm_map_sign(vm_map_t map, 
+                                vm_map_offset_t start, 
+                                vm_map_offset_t end);
+#endif
+
 __END_DECLS
 
 #endif /* KERNEL_PRIVATE */
 __END_DECLS
 
 #endif /* KERNEL_PRIVATE */
index 5ad70b32332065d0c911c3df05c6d64c9912e999..e8a1605a75957a0fdb12d28d7f0c595d7f2894e6 100644 (file)
@@ -663,14 +663,15 @@ copyio(int copy_type, user_addr_t user_addr, char *kernel_addr,
 
         pmap = thread->map->pmap;
 
 
         pmap = thread->map->pmap;
 
+
+       assert((vm_offset_t)kernel_addr >= VM_MIN_KERNEL_AND_KEXT_ADDRESS ||
+              copy_type == COPYINPHYS || copy_type == COPYOUTPHYS);
+
        /* Sanity and security check for addresses to/from a user */
        /* Sanity and security check for addresses to/from a user */
-       if ((copy_type == COPYIN ||
-            copy_type == COPYINSTR ||
-            copy_type == COPYOUT) &&
-           (pmap != kernel_pmap) &&
-           ((vm_offset_t)kernel_addr < VM_MIN_KERNEL_AND_KEXT_ADDRESS ||
-            !IS_USERADDR64_CANONICAL(user_addr))) {
-               error = EACCES;
+
+       if (((pmap != kernel_pmap) && (use_kernel_map == 0)) &&
+           ((nbytes && (user_addr+nbytes <= user_addr)) || ((user_addr + nbytes) > vm_map_max(thread->map)))) {
+               error = EFAULT;
                goto out;
        }
 
                goto out;
        }
 
index 13c439a96e85d25404bc5f27619658b2a879a15e..e53843224c62381ee851207931add9ea17adb1f6 100644 (file)
@@ -90,7 +90,6 @@
  */
 
 #include <string.h>
  */
 
 #include <string.h>
-#include <norma_vm.h>
 #include <mach_kdb.h>
 #include <mach_ldebug.h>
 
 #include <mach_kdb.h>
 #include <mach_ldebug.h>
 
 #include <i386/mp_desc.h>
 
 
 #include <i386/mp_desc.h>
 
 
-/* #define DEBUGINTERRUPTS 1  uncomment to ensure pmap callers have interrupts enabled */
-#ifdef DEBUGINTERRUPTS
-#define pmap_intr_assert() {                                                   \
-       if (processor_avail_count > 1 && !ml_get_interrupts_enabled())          \
-               panic("pmap interrupt assert %s, %d",__FILE__, __LINE__);       \
-}
-#else
-#define pmap_intr_assert()
-#endif
 
 #ifdef IWANTTODEBUG
 #undef DEBUG
 
 #ifdef IWANTTODEBUG
 #undef DEBUG
@@ -178,11 +168,6 @@ boolean_t  no_shared_cr3 = DEBUG;          /* TRUE for DEBUG by default */
  * Forward declarations for internal functions.
  */
 
  * Forward declarations for internal functions.
  */
 
-void           pmap_remove_range(
-                       pmap_t          pmap,
-                       vm_map_offset_t va,
-                       pt_entry_t      *spte,
-                       pt_entry_t      *epte);
 
 void           phys_attribute_clear(
                        ppnum_t         phys,
 
 void           phys_attribute_clear(
                        ppnum_t         phys,
@@ -209,166 +194,12 @@ int allow_stack_exec = 0;                /* No apps may execute from the stack by default */
 
 const boolean_t cpu_64bit  = TRUE; /* Mais oui! */
 
 
 const boolean_t cpu_64bit  = TRUE; /* Mais oui! */
 
-/*
- * when spinning through pmap_remove
- * ensure that we don't spend too much
- * time with preemption disabled.
- * I'm setting the current threshold
- * to 20us
- */
-#define MAX_PREEMPTION_LATENCY_NS 20000
-
 uint64_t max_preemption_latency_tsc = 0;
 
 uint64_t max_preemption_latency_tsc = 0;
 
-
-/*
- *     Private data structures.
- */
-
-/*
- *     For each vm_page_t, there is a list of all currently
- *     valid virtual mappings of that page.  An entry is
- *     a pv_rooted_entry_t; the list is the pv_table.
- *
- *      N.B.  with the new combo rooted/hashed scheme it is
- *      only possibly to remove individual non-rooted entries
- *      if they are found via the hashed chains as there is no
- *      way to unlink the singly linked hashed entries if navigated to
- *      via the queue list off the rooted entries.  Think of it as
- *      hash/walk/pull, keeping track of the prev pointer while walking
- *      the singly linked hash list.  All of this is to save memory and
- *      keep both types of pv_entries as small as possible.
- */
-
-/*
-
-PV HASHING Changes - JK 1/2007
-
-Pve's establish physical to virtual mappings.  These are used for aliasing of a 
-physical page to (potentially many) virtual addresses within pmaps. In the
-previous implementation the structure of the pv_entries (each 16 bytes in size) was
-
-typedef struct pv_entry {
-    struct pv_entry_t    next;
-    pmap_t                    pmap;
-    vm_map_offset_t   va;
-} *pv_entry_t;
-
-An initial array of these is created at boot time, one per physical page of
-memory, indexed by the physical page number. Additionally, a pool of entries
-is created from a pv_zone to be used as needed by pmap_enter() when it is
-creating new mappings.  Originally, we kept this pool around because the code
-in pmap_enter() was unable to block if it needed an entry and none were
-available - we'd panic.  Some time ago I restructured the pmap_enter() code
-so that for user pmaps it can block while zalloc'ing a pv structure and restart,
-removing a panic from the code (in the case of the kernel pmap we cannot block
-and still panic, so, we keep a separate hot pool for use only on kernel pmaps).
-The pool has not been removed since there is a large performance gain keeping
-freed pv's around for reuse and not suffering the overhead of zalloc for every
-new pv we need.
-
-As pmap_enter() created new mappings it linked the new pve's for them off the
-fixed pv array for that ppn (off the next pointer).  These pve's are accessed
-for several operations, one of them being address space teardown. In that case,
-we basically do this
-
-       for (every page/pte in the space) {
-               calc pve_ptr from the ppn in the pte
-               for (every pv in the list for the ppn) {
-                       if (this pv is for this pmap/vaddr) {
-                               do housekeeping
-                               unlink/free the pv
-                       }
-               }
-       }
-
-The problem arose when we were running, say 8000 (or even 2000) apache or
-other processes and one or all terminate. The list hanging off each pv array
-entry could have thousands of entries.  We were continuously linearly searching
-each of these lists as we stepped through the address space we were tearing
-down.  Because of the locks we hold, likely taking a cache miss for each node,
-and interrupt disabling for MP issues the system became completely unresponsive
-for many seconds while we did this.
-
-Realizing that pve's are accessed in two distinct ways (linearly running the
-list by ppn for operations like pmap_page_protect and finding and
-modifying/removing a single pve as part of pmap_enter processing) has led to
-modifying the pve structures and databases.
-
-There are now two types of pve structures.  A "rooted" structure which is
-basically the original structure accessed in an array by ppn, and a ''hashed''
-structure accessed on a hash list via a hash of [pmap, vaddr]. These have been
-designed with the two goals of minimizing wired memory and making the lookup of
-a ppn faster.  Since a vast majority of pages in the system are not aliased
-and hence represented by a single pv entry I've kept the rooted entry size as
-small as possible because there is one of these dedicated for every physical
-page of memory.  The hashed pve's are larger due to the addition of the hash
-link and the ppn entry needed for matching while running the hash list to find
-the entry we are looking for.  This way, only systems that have lots of
-aliasing (like 2000+ httpd procs) will pay the extra memory price. Both
-structures have the same first three fields allowing some simplification in
-the code.
-
-They have these shapes
-
-typedef struct pv_rooted_entry {
-       queue_head_t            qlink;
-        vm_map_offset_t                va;
-       pmap_t                  pmap;
-} *pv_rooted_entry_t;
-
-
-typedef struct pv_hashed_entry {
-       queue_head_t            qlink;
-       vm_map_offset_t         va;
-       pmap_t                  pmap;
-       ppnum_t                 ppn;
-       struct pv_hashed_entry *nexth;
-} *pv_hashed_entry_t;
-
-The main flow difference is that the code is now aware of the rooted entry and
-the hashed entries.  Code that runs the pv list still starts with the rooted
-entry and then continues down the qlink onto the hashed entries.  Code that is
-looking up a specific pv entry first checks the rooted entry and then hashes
-and runs the hash list for the match. The hash list lengths are much smaller
-than the original pv lists that contained all aliases for the specific ppn.
-
-*/
-
-typedef struct pv_rooted_entry {
-       /* first three entries must match pv_hashed_entry_t */
-        queue_head_t           qlink;
-       vm_map_offset_t         va;     /* virtual address for mapping */
-       pmap_t                  pmap;   /* pmap where mapping lies */
-} *pv_rooted_entry_t;
-
-#define PV_ROOTED_ENTRY_NULL   ((pv_rooted_entry_t) 0)
-
-pv_rooted_entry_t      pv_head_table;          /* array of entries, one per page */
-
-typedef struct pv_hashed_entry {
-       /* first three entries must match pv_rooted_entry_t */
-       queue_head_t            qlink;
-       vm_map_offset_t         va;
-       pmap_t                  pmap;
-       ppnum_t                 ppn;
-       struct pv_hashed_entry  *nexth;
-} *pv_hashed_entry_t;
-
-#define PV_HASHED_ENTRY_NULL ((pv_hashed_entry_t)0)
-
-#define NPVHASH 4095   /* MUST BE 2^N - 1 */
 pv_hashed_entry_t     *pv_hash_table;  /* hash lists */
 
 uint32_t npvhash = 0;
 
 pv_hashed_entry_t     *pv_hash_table;  /* hash lists */
 
 uint32_t npvhash = 0;
 
-//#define PV_DEBUG 1   /* uncomment to enable some PV debugging code */
-#ifdef PV_DEBUG
-#define CHK_NPVHASH() if(0 == npvhash) panic("npvhash uninitialized");
-#else
-#define CHK_NPVHASH(x)
-#endif
-
 pv_hashed_entry_t      pv_hashed_free_list = PV_HASHED_ENTRY_NULL;
 pv_hashed_entry_t      pv_hashed_kern_free_list = PV_HASHED_ENTRY_NULL;
 decl_simple_lock_data(,pv_hashed_free_list_lock)
 pv_hashed_entry_t      pv_hashed_free_list = PV_HASHED_ENTRY_NULL;
 pv_hashed_entry_t      pv_hashed_kern_free_list = PV_HASHED_ENTRY_NULL;
 decl_simple_lock_data(,pv_hashed_free_list_lock)
@@ -377,53 +208,7 @@ decl_simple_lock_data(,pv_hash_table_lock)
 
 int                    pv_hashed_free_count = 0;
 int                    pv_hashed_kern_free_count = 0;
 
 int                    pv_hashed_free_count = 0;
 int                    pv_hashed_kern_free_count = 0;
-#define PV_HASHED_LOW_WATER_MARK 5000
-#define PV_HASHED_KERN_LOW_WATER_MARK 100
-#define PV_HASHED_ALLOC_CHUNK 2000
-#define PV_HASHED_KERN_ALLOC_CHUNK 50
-thread_call_t          mapping_adjust_call;
-static thread_call_data_t mapping_adjust_call_data;
-uint32_t               mappingrecurse = 0;
-
-#define        PV_HASHED_ALLOC(pvh_e) {                                        \
-       simple_lock(&pv_hashed_free_list_lock);                         \
-       if ((pvh_e = pv_hashed_free_list) != 0) {                       \
-         pv_hashed_free_list = (pv_hashed_entry_t)pvh_e->qlink.next;   \
-          pv_hashed_free_count--;                                      \
-          if (pv_hashed_free_count < PV_HASHED_LOW_WATER_MARK)         \
-            if (hw_compare_and_store(0,1,(u_int *)&mappingrecurse))    \
-              thread_call_enter(mapping_adjust_call);                  \
-       }                                                               \
-       simple_unlock(&pv_hashed_free_list_lock);                       \
-}
-
-#define        PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pv_cnt) {                   \
-       simple_lock(&pv_hashed_free_list_lock);                         \
-       pvh_et->qlink.next = (queue_entry_t)pv_hashed_free_list;        \
-       pv_hashed_free_list = pvh_eh;                                   \
-        pv_hashed_free_count += pv_cnt;                                        \
-       simple_unlock(&pv_hashed_free_list_lock);                       \
-}
-
-#define        PV_HASHED_KERN_ALLOC(pvh_e) {                                   \
-       simple_lock(&pv_hashed_kern_free_list_lock);                    \
-       if ((pvh_e = pv_hashed_kern_free_list) != 0) {                  \
-         pv_hashed_kern_free_list = (pv_hashed_entry_t)pvh_e->qlink.next; \
-          pv_hashed_kern_free_count--;                                 \
-          if (pv_hashed_kern_free_count < PV_HASHED_KERN_LOW_WATER_MARK)\
-            if (hw_compare_and_store(0,1,(u_int *)&mappingrecurse))    \
-              thread_call_enter(mapping_adjust_call);                  \
-       }                                                               \
-       simple_unlock(&pv_hashed_kern_free_list_lock);                  \
-}
 
 
-#define        PV_HASHED_KERN_FREE_LIST(pvh_eh, pvh_et, pv_cnt) {              \
-       simple_lock(&pv_hashed_kern_free_list_lock);                    \
-       pvh_et->qlink.next = (queue_entry_t)pv_hashed_kern_free_list;   \
-       pv_hashed_kern_free_list = pvh_eh;                              \
-        pv_hashed_kern_free_count += pv_cnt;                           \
-       simple_unlock(&pv_hashed_kern_free_list_lock);                  \
-}
 
 zone_t         pv_hashed_list_zone;    /* zone of pv_hashed_entry structures */
 
 
 zone_t         pv_hashed_list_zone;    /* zone of pv_hashed_entry structures */
 
@@ -436,10 +221,10 @@ static zone_t pdpt_zone;
  */
 
 char   *pv_lock_table;         /* pointer to array of bits */
  */
 
 char   *pv_lock_table;         /* pointer to array of bits */
-#define pv_lock_table_size(n)  (((n)+BYTE_SIZE-1)/BYTE_SIZE)
+
 
 char    *pv_hash_lock_table;
 
 char    *pv_hash_lock_table;
-#define pv_hash_lock_table_size(n)  (((n)+BYTE_SIZE-1)/BYTE_SIZE)
+
 
 /*
  *     First and last physical addresses that we maintain any information
 
 /*
  *     First and last physical addresses that we maintain any information
@@ -453,97 +238,13 @@ static struct vm_object kpml4obj_object_store;
 static struct vm_object kpdptobj_object_store;
 
 /*
 static struct vm_object kpdptobj_object_store;
 
 /*
- *     Index into pv_head table, its lock bits, and the modify/reference and managed bits
- */
-
-#define pa_index(pa)           (i386_btop(pa))
-#define ppn_to_pai(ppn)                ((int)ppn)
-
-#define pai_to_pvh(pai)                (&pv_head_table[pai])
-#define lock_pvh_pai(pai)      bit_lock(pai, (void *)pv_lock_table)
-#define unlock_pvh_pai(pai)    bit_unlock(pai, (void *)pv_lock_table)
-
-static inline uint32_t
-pvhashidx(pmap_t pmap, vm_offset_t va)
-{
-       return ((uint32_t)(uint64_t)pmap ^
-               ((uint32_t)((uint64_t)va >> PAGE_SHIFT) & 0xFFFFFFFF)) &
-              npvhash;
-}
-#define pvhash(idx)            (&pv_hash_table[idx])
-
-#define lock_hash_hash(hash)   bit_lock(hash, (void *)pv_hash_lock_table)
-#define unlock_hash_hash(hash) bit_unlock(hash, (void *)pv_hash_lock_table)
-
-/*
- *     Array of physical page attribites for managed pages.
+ *     Array of physical page attributes for managed pages.
  *     One byte per physical page.
  */
 char           *pmap_phys_attributes;
 unsigned int   last_managed_page = 0;
  *     One byte per physical page.
  */
 char           *pmap_phys_attributes;
 unsigned int   last_managed_page = 0;
-#define IS_MANAGED_PAGE(x)                             \
-       ((unsigned int)(x) <= last_managed_page &&      \
-        (pmap_phys_attributes[x] & PHYS_MANAGED))
-
-/*
- *     Physical page attributes.  Copy bits from PTE definition.
- */
-#define        PHYS_MODIFIED   INTEL_PTE_MOD   /* page modified */
-#define        PHYS_REFERENCED INTEL_PTE_REF   /* page referenced */
-#define PHYS_MANAGED   INTEL_PTE_VALID /* page is managed */
-
-/*
- *     Amount of virtual memory mapped by one
- *     page-directory entry.
- */
-#define        PDE_MAPPED_SIZE         (pdetova(1))
 uint64_t pde_mapped_size = PDE_MAPPED_SIZE;
 
 uint64_t pde_mapped_size = PDE_MAPPED_SIZE;
 
-/*
- *     Locking and TLB invalidation
- */
-
-/*
- *     Locking Protocols: (changed 2/2007 JK)
- *
- *     There are two structures in the pmap module that need locking:
- *     the pmaps themselves, and the per-page pv_lists (which are locked
- *     by locking the pv_lock_table entry that corresponds to the pv_head
- *     for the list in question.)  Most routines want to lock a pmap and
- *     then do operations in it that require pv_list locking -- however
- *     pmap_remove_all and pmap_copy_on_write operate on a physical page
- *     basis and want to do the locking in the reverse order, i.e. lock
- *     a pv_list and then go through all the pmaps referenced by that list.
- *
- *      The system wide pmap lock has been removed. Now, paths take a lock
- *      on the pmap before changing its 'shape' and the reverse order lockers
- *      (coming in by phys ppn) take a lock on the corresponding pv and then
- *      retest to be sure nothing changed during the window before they locked
- *      and can then run up/down the pv lists holding the list lock. This also
- *      lets the pmap layer run (nearly completely) interrupt enabled, unlike
- *      previously.
- */
-
-/*
- * PV locking
- */
-
-#define LOCK_PVH(index)        {               \
-       mp_disable_preemption();        \
-       lock_pvh_pai(index);            \
-}
-
-#define UNLOCK_PVH(index) {            \
-       unlock_pvh_pai(index);          \
-       mp_enable_preemption();         \
-}
-/*
- * PV hash locking
- */
-
-#define LOCK_PV_HASH(hash)         lock_hash_hash(hash)
-#define UNLOCK_PV_HASH(hash)       unlock_hash_hash(hash)
-
 unsigned pmap_memory_region_count;
 unsigned pmap_memory_region_current;
 
 unsigned pmap_memory_region_count;
 unsigned pmap_memory_region_current;
 
@@ -562,8 +263,6 @@ pd_entry_t  commpage64_pde;
 
 struct zone    *pmap_zone;             /* zone of pmap structures */
 
 
 struct zone    *pmap_zone;             /* zone of pmap structures */
 
-int            pmap_debug = 0;         /* flag for debugging prints */
-
 unsigned int   inuse_ptepages_count = 0;
 
 addr64_t       kernel64_cr3;
 unsigned int   inuse_ptepages_count = 0;
 
 addr64_t       kernel64_cr3;
@@ -585,170 +284,6 @@ static int        nkpt;
 pt_entry_t     *DMAP1, *DMAP2;
 caddr_t         DADDR1;
 caddr_t         DADDR2;
 pt_entry_t     *DMAP1, *DMAP2;
 caddr_t         DADDR1;
 caddr_t         DADDR2;
-
-/*
- * unlinks the pv_hashed_entry_t pvh from the singly linked hash chain.
- * properly deals with the anchor.
- * must be called with the hash locked, does not unlock it
- */
-
-static inline void 
-pmap_pvh_unlink(pv_hashed_entry_t pvh)
-{
-       pv_hashed_entry_t       curh;
-       pv_hashed_entry_t       *pprevh;
-       int                     pvhash_idx;
-
-       CHK_NPVHASH();
-       pvhash_idx = pvhashidx(pvh->pmap, pvh->va);
-
-       pprevh = pvhash(pvhash_idx);
-
-#if PV_DEBUG
-       if (NULL == *pprevh)
-               panic("pvh_unlink null anchor"); /* JK DEBUG */
-#endif
-       curh = *pprevh;
-
-       while (PV_HASHED_ENTRY_NULL != curh) {
-               if (pvh == curh)
-                       break;
-               pprevh = &curh->nexth;
-               curh = curh->nexth;
-       }
-       if (PV_HASHED_ENTRY_NULL == curh) panic("pmap_pvh_unlink no pvh");
-       *pprevh = pvh->nexth;
-       return;
-}
-
-static inline void
-pv_hash_add(pv_hashed_entry_t  pvh_e,
-           pv_rooted_entry_t   pv_h)
-{
-       pv_hashed_entry_t       *hashp;
-       int                     pvhash_idx;
-
-       CHK_NPVHASH();
-       pvhash_idx = pvhashidx(pvh_e->pmap, pvh_e->va);
-       LOCK_PV_HASH(pvhash_idx);
-       insque(&pvh_e->qlink, &pv_h->qlink);
-       hashp = pvhash(pvhash_idx);
-#if PV_DEBUG
-       if (NULL==hashp)
-               panic("pv_hash_add(%p) null hash bucket", pvh_e);
-#endif
-       pvh_e->nexth = *hashp;
-       *hashp = pvh_e;
-       UNLOCK_PV_HASH(pvhash_idx);
-}
-
-static inline void
-pv_hash_remove(pv_hashed_entry_t pvh_e)
-{
-       int                     pvhash_idx;
-
-       CHK_NPVHASH();
-       pvhash_idx = pvhashidx(pvh_e->pmap,pvh_e->va);
-       LOCK_PV_HASH(pvhash_idx);
-       remque(&pvh_e->qlink);
-       pmap_pvh_unlink(pvh_e);
-       UNLOCK_PV_HASH(pvhash_idx);
-} 
-
-/*
- * Remove pv list entry.
- * Called with pv_head_table entry locked.
- * Returns pv entry to be freed (or NULL).
- */
-static inline pv_hashed_entry_t
-pmap_pv_remove(pmap_t          pmap,
-              vm_map_offset_t  vaddr,
-              ppnum_t          ppn)
-{
-       pv_hashed_entry_t       pvh_e;
-       pv_rooted_entry_t       pv_h;
-       pv_hashed_entry_t       *pprevh;
-       int                     pvhash_idx;
-       uint32_t                pv_cnt;
-
-       pvh_e = PV_HASHED_ENTRY_NULL;
-       pv_h = pai_to_pvh(ppn_to_pai(ppn));
-       if (pv_h->pmap == PMAP_NULL)
-               panic("pmap_pv_remove(%p,%llu,%u): null pv_list!",
-                     pmap, vaddr, ppn);
-
-       if (pv_h->va == vaddr && pv_h->pmap == pmap) {
-               /*
-                * Header is the pv_rooted_entry.
-                * We can't free that. If there is a queued
-                * entry after this one we remove that
-                * from the ppn queue, we remove it from the hash chain
-                * and copy it to the rooted entry. Then free it instead.
-                */
-               pvh_e = (pv_hashed_entry_t) queue_next(&pv_h->qlink);
-               if (pv_h != (pv_rooted_entry_t) pvh_e) {
-                       /*
-                        * Entry queued to root, remove this from hash
-                        * and install as nem root.
-                        */
-                       CHK_NPVHASH();
-                       pvhash_idx = pvhashidx(pvh_e->pmap, pvh_e->va);
-                       LOCK_PV_HASH(pvhash_idx);
-                       remque(&pvh_e->qlink);
-                       pprevh = pvhash(pvhash_idx);
-                       if (PV_HASHED_ENTRY_NULL == *pprevh) {
-                               panic("pmap_pv_remove(%p,%llu,%u): "
-                                     "empty hash, removing rooted",
-                                     pmap, vaddr, ppn);
-                       }
-                       pmap_pvh_unlink(pvh_e);
-                       UNLOCK_PV_HASH(pvhash_idx);
-                       pv_h->pmap = pvh_e->pmap;
-                       pv_h->va = pvh_e->va;   /* dispose of pvh_e */
-               } else {
-                       /* none queued after rooted */
-                       pv_h->pmap = PMAP_NULL;
-                       pvh_e = PV_HASHED_ENTRY_NULL;
-               }
-       } else {
-               /*
-                * not removing rooted pv. find it on hash chain, remove from
-                * ppn queue and hash chain and free it
-                */
-               CHK_NPVHASH();
-               pvhash_idx = pvhashidx(pmap, vaddr);
-               LOCK_PV_HASH(pvhash_idx);
-               pprevh = pvhash(pvhash_idx);
-               if (PV_HASHED_ENTRY_NULL == *pprevh) {
-                       panic("pmap_pv_remove(%p,%llu,%u): empty hash",
-                             pmap, vaddr, ppn);
-               }
-               pvh_e = *pprevh;
-               pmap_pv_hashlist_walks++;
-               pv_cnt = 0;
-               while (PV_HASHED_ENTRY_NULL != pvh_e) {
-                       pv_cnt++;
-                       if (pvh_e->pmap == pmap &&
-                           pvh_e->va == vaddr &&
-                           pvh_e->ppn == ppn)
-                               break;
-                       pprevh = &pvh_e->nexth;
-                       pvh_e = pvh_e->nexth;
-               }
-               if (PV_HASHED_ENTRY_NULL == pvh_e)
-                       panic("pmap_pv_remove(%p,%llu,%u): pv not on hash",
-                        pmap, vaddr, ppn);
-               pmap_pv_hashlist_cnts += pv_cnt;
-               if (pmap_pv_hashlist_max < pv_cnt)
-                       pmap_pv_hashlist_max = pv_cnt;
-               *pprevh = pvh_e->nexth;
-               remque(&pvh_e->qlink);
-               UNLOCK_PV_HASH(pvhash_idx);
-       }
-
-       return pvh_e;
-}
-
 /*
  * for legacy, returns the address of the pde entry.
  * for 64 bit, causes the pdpt page containing the pde entry to be mapped,
 /*
  * for legacy, returns the address of the pde entry.
  * for 64 bit, causes the pdpt page containing the pde entry to be mapped,
@@ -1463,147 +998,6 @@ pmap_reference(pmap_t    p)
        }
 }
 
        }
 }
 
-/*
- *     Remove a range of hardware page-table entries.
- *     The entries given are the first (inclusive)
- *     and last (exclusive) entries for the VM pages.
- *     The virtual address is the va for the first pte.
- *
- *     The pmap must be locked.
- *     If the pmap is not the kernel pmap, the range must lie
- *     entirely within one pte-page.  This is NOT checked.
- *     Assumes that the pte-page exists.
- */
-
-void
-pmap_remove_range(
-       pmap_t                  pmap,
-       vm_map_offset_t         start_vaddr,
-       pt_entry_t              *spte,
-       pt_entry_t              *epte)
-{
-       pt_entry_t              *cpte;
-       pv_hashed_entry_t       pvh_et = PV_HASHED_ENTRY_NULL;
-       pv_hashed_entry_t       pvh_eh = PV_HASHED_ENTRY_NULL;
-       pv_hashed_entry_t       pvh_e;
-       int                     pvh_cnt = 0;
-       int                     num_removed, num_unwired, num_found;
-       int                     pai;
-       pmap_paddr_t            pa;
-       vm_map_offset_t         vaddr;
-
-       num_removed = 0;
-       num_unwired = 0;
-       num_found   = 0;
-
-       /* invalidate the PTEs first to "freeze" them */
-       for (cpte = spte, vaddr = start_vaddr;
-            cpte < epte;
-            cpte++, vaddr += PAGE_SIZE_64) {
-
-               pa = pte_to_pa(*cpte);
-               if (pa == 0)
-                       continue;
-               num_found++;
-
-               if (iswired(*cpte))
-                       num_unwired++;
-
-               pai = pa_index(pa);
-
-               if (!IS_MANAGED_PAGE(pai)) {
-                       /*
-                        *      Outside range of managed physical memory.
-                        *      Just remove the mappings.
-                        */
-                       pmap_store_pte(cpte, 0);
-                       continue;
-               }
-
-               /* invalidate the PTE */ 
-               pmap_update_pte(cpte, *cpte, (*cpte & ~INTEL_PTE_VALID));
-       }
-
-       if (num_found == 0) {
-               /* nothing was changed: we're done */
-               goto update_counts;
-       }
-
-       /* propagate the invalidates to other CPUs */
-
-       PMAP_UPDATE_TLBS(pmap, start_vaddr, vaddr);
-
-       for (cpte = spte, vaddr = start_vaddr;
-            cpte < epte;
-            cpte++, vaddr += PAGE_SIZE_64) {
-
-               pa = pte_to_pa(*cpte);
-               if (pa == 0)
-                       continue;
-
-               pai = pa_index(pa);
-
-               LOCK_PVH(pai);
-
-               pa = pte_to_pa(*cpte);
-               if (pa == 0) {
-                       UNLOCK_PVH(pai);
-                       continue;
-               }
-               num_removed++;
-
-               /*
-                * Get the modify and reference bits, then
-                * nuke the entry in the page table
-                */
-               /* remember reference and change */
-               pmap_phys_attributes[pai] |=
-                       (char) (*cpte & (PHYS_MODIFIED | PHYS_REFERENCED));
-               /* completely invalidate the PTE */
-               pmap_store_pte(cpte, 0);
-
-               /*
-                * Remove the mapping from the pvlist for this physical page.
-                */
-               pvh_e = pmap_pv_remove(pmap, vaddr, (ppnum_t) pai);
-
-               UNLOCK_PVH(pai);
-
-               if (pvh_e != PV_HASHED_ENTRY_NULL) {
-                       pvh_e->qlink.next = (queue_entry_t) pvh_eh;
-                       pvh_eh = pvh_e;
-
-                       if (pvh_et == PV_HASHED_ENTRY_NULL) {
-                               pvh_et = pvh_e;
-                       }
-                       pvh_cnt++;
-               }
-       } /* for loop */
-
-       if (pvh_eh != PV_HASHED_ENTRY_NULL) {
-               PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pvh_cnt);
-       }
-update_counts:
-       /*
-        *      Update the counts
-        */
-#if TESTING
-       if (pmap->stats.resident_count < num_removed)
-               panic("pmap_remove_range: resident_count");
-#endif
-       assert(pmap->stats.resident_count >= num_removed);
-       OSAddAtomic(-num_removed,  &pmap->stats.resident_count);
-
-#if TESTING
-       if (pmap->stats.wired_count < num_unwired)
-               panic("pmap_remove_range: wired_count");
-#endif
-       assert(pmap->stats.wired_count >= num_unwired);
-       OSAddAtomic(-num_unwired,  &pmap->stats.wired_count);
-
-       return;
-}
-
 /*
  *     Remove phys addr if mapped in specified map
  *
 /*
  *     Remove phys addr if mapped in specified map
  *
@@ -1618,274 +1012,6 @@ pmap_remove_some_phys(
 
 }
 
 
 }
 
-/*
- *     Remove the given range of addresses
- *     from the specified map.
- *
- *     It is assumed that the start and end are properly
- *     rounded to the hardware page size.
- */
-void
-pmap_remove(
-       pmap_t          map,
-       addr64_t        s64,
-       addr64_t        e64)
-{
-       pt_entry_t     *pde;
-       pt_entry_t     *spte, *epte;
-       addr64_t        l64;
-       uint64_t        deadline;
-
-       pmap_intr_assert();
-
-       if (map == PMAP_NULL || s64 == e64)
-               return;
-
-       PMAP_TRACE(PMAP_CODE(PMAP__REMOVE) | DBG_FUNC_START,
-                  map,
-                  (uint32_t) (s64 >> 32), s64,
-                  (uint32_t) (e64 >> 32), e64);
-
-
-       PMAP_LOCK(map);
-
-#if 0
-       /*
-        * Check that address range in the kernel does not overlap the stacks.
-        * We initialize local static min/max variables once to avoid making
-        * 2 function calls for every remove. Note also that these functions
-        * both return 0 before kernel stacks have been initialized, and hence
-        * the panic is not triggered in this case.
-        */
-       if (map == kernel_pmap) {
-               static vm_offset_t kernel_stack_min = 0;
-               static vm_offset_t kernel_stack_max = 0;
-
-               if (kernel_stack_min == 0) {
-                       kernel_stack_min = min_valid_stack_address();
-                       kernel_stack_max = max_valid_stack_address();
-               }
-               if ((kernel_stack_min <= s64 && s64 < kernel_stack_max) ||
-                   (kernel_stack_min < e64 && e64 <= kernel_stack_max))
-                       panic("pmap_remove() attempted in kernel stack");
-       }
-#else
-
-       /*
-        * The values of kernel_stack_min and kernel_stack_max are no longer
-        * relevant now that we allocate kernel stacks in the kernel map,
-        * so the old code above no longer applies.  If we wanted to check that
-        * we weren't removing a mapping of a page in a kernel stack we'd 
-        * mark the PTE with an unused bit and check that here.
-        */
-
-#endif
-
-       deadline = rdtsc64() + max_preemption_latency_tsc;
-
-       while (s64 < e64) {
-               l64 = (s64 + pde_mapped_size) & ~(pde_mapped_size - 1);
-               if (l64 > e64)
-                       l64 = e64;
-               pde = pmap_pde(map, s64);
-
-               if (pde && (*pde & INTEL_PTE_VALID)) {
-                       if (*pde & INTEL_PTE_PS) {
-                               /*
-                                * If we're removing a superpage, pmap_remove_range()
-                                * must work on level 2 instead of level 1; and we're
-                                * only passing a single level 2 entry instead of a
-                                * level 1 range.
-                                */
-                               spte = pde;
-                               epte = spte+1; /* excluded */
-                       } else {
-                               spte = pmap_pte(map, (s64 & ~(pde_mapped_size - 1)));
-                               spte = &spte[ptenum(s64)];
-                               epte = &spte[intel_btop(l64 - s64)];
-                       }
-                       pmap_remove_range(map, s64, spte, epte);
-               }
-               s64 = l64;
-               pde++;
-
-               if (s64 < e64 && rdtsc64() >= deadline) {
-                       PMAP_UNLOCK(map)
-                       PMAP_LOCK(map)
-                       deadline = rdtsc64() + max_preemption_latency_tsc;
-               }
-       }
-
-       PMAP_UNLOCK(map);
-
-       PMAP_TRACE(PMAP_CODE(PMAP__REMOVE) | DBG_FUNC_END,
-                  map, 0, 0, 0, 0);
-
-}
-
-/*
- *     Routine:        pmap_page_protect
- *
- *     Function:
- *             Lower the permission for all mappings to a given
- *             page.
- */
-void
-pmap_page_protect(
-        ppnum_t         pn,
-       vm_prot_t       prot)
-{
-       pv_hashed_entry_t       pvh_eh = PV_HASHED_ENTRY_NULL;
-       pv_hashed_entry_t       pvh_et = PV_HASHED_ENTRY_NULL;
-       pv_hashed_entry_t       nexth;
-       int                     pvh_cnt = 0;
-       pv_rooted_entry_t       pv_h;
-       pv_rooted_entry_t       pv_e;
-       pv_hashed_entry_t       pvh_e;
-       pt_entry_t              *pte;
-       int                     pai;
-       pmap_t                  pmap;
-       boolean_t               remove;
-
-       pmap_intr_assert();
-       assert(pn != vm_page_fictitious_addr);
-       if (pn == vm_page_guard_addr)
-               return;
-
-       pai = ppn_to_pai(pn);
-
-       if (!IS_MANAGED_PAGE(pai)) {
-               /*
-                *      Not a managed page.
-                */
-               return;
-       }
-       PMAP_TRACE(PMAP_CODE(PMAP__PAGE_PROTECT) | DBG_FUNC_START,
-                  pn, prot, 0, 0, 0);
-
-       /*
-        * Determine the new protection.
-        */
-       switch (prot) {
-       case VM_PROT_READ:
-       case VM_PROT_READ | VM_PROT_EXECUTE:
-               remove = FALSE;
-               break;
-       case VM_PROT_ALL:
-               return;         /* nothing to do */
-       default:
-               remove = TRUE;
-               break;
-       }
-
-       pv_h = pai_to_pvh(pai);
-
-       LOCK_PVH(pai);
-
-
-       /*
-        * Walk down PV list, if any, changing or removing all mappings.
-        */
-       if (pv_h->pmap == PMAP_NULL)
-               goto done;
-
-       pv_e = pv_h;
-       pvh_e = (pv_hashed_entry_t) pv_e;       /* cheat */
-
-       do {
-               vm_map_offset_t vaddr;
-
-               pmap = pv_e->pmap;
-               vaddr = pv_e->va;
-               pte = pmap_pte(pmap, vaddr);
-               if (0 == pte) {
-                       panic("pmap_page_protect() "
-                               "pmap=%p pn=0x%x vaddr=0x%llx\n",
-                               pmap, pn, vaddr);
-               }
-               nexth = (pv_hashed_entry_t) queue_next(&pvh_e->qlink);
-
-               /*
-                * Remove the mapping if new protection is NONE
-                * or if write-protecting a kernel mapping.
-                */
-               if (remove || pmap == kernel_pmap) {
-                       /*
-                        * Remove the mapping, collecting dirty bits.
-                        */
-                       pmap_update_pte(pte, *pte, *pte & ~INTEL_PTE_VALID);
-                       PMAP_UPDATE_TLBS(pmap, vaddr, vaddr+PAGE_SIZE);
-                       pmap_phys_attributes[pai] |=
-                               *pte & (PHYS_MODIFIED|PHYS_REFERENCED);
-                       pmap_store_pte(pte, 0);
-
-#if TESTING
-                       if (pmap->stats.resident_count < 1)
-                               panic("pmap_page_protect: resident_count");
-#endif
-                       assert(pmap->stats.resident_count >= 1);
-                       OSAddAtomic(-1,  &pmap->stats.resident_count);
-
-                       /*
-                        * Deal with the pv_rooted_entry.
-                        */
-
-                       if (pv_e == pv_h) {
-                               /*
-                                * Fix up head later.
-                                */
-                               pv_h->pmap = PMAP_NULL;
-                       } else {
-                               /*
-                                * Delete this entry.
-                                */
-                               pv_hash_remove(pvh_e);
-                               pvh_e->qlink.next = (queue_entry_t) pvh_eh;
-                               pvh_eh = pvh_e;
-
-                               if (pvh_et == PV_HASHED_ENTRY_NULL)
-                                       pvh_et = pvh_e;
-                               pvh_cnt++;
-                       }
-               } else {
-                       /*
-                        * Write-protect.
-                        */
-                       pmap_update_pte(pte, *pte, *pte & ~INTEL_PTE_WRITE);
-                       PMAP_UPDATE_TLBS(pmap, vaddr, vaddr+PAGE_SIZE);
-               }
-               pvh_e = nexth;
-       } while ((pv_e = (pv_rooted_entry_t) nexth) != pv_h);
-
-
-       /*
-         * If pv_head mapping was removed, fix it up.
-         */
-       if (pv_h->pmap == PMAP_NULL) {
-               pvh_e = (pv_hashed_entry_t) queue_next(&pv_h->qlink);
-
-               if (pvh_e != (pv_hashed_entry_t) pv_h) {
-                       pv_hash_remove(pvh_e);
-                       pv_h->pmap = pvh_e->pmap;
-                       pv_h->va = pvh_e->va;
-                       pvh_e->qlink.next = (queue_entry_t) pvh_eh;
-                       pvh_eh = pvh_e;
-
-                       if (pvh_et == PV_HASHED_ENTRY_NULL)
-                               pvh_et = pvh_e;
-                       pvh_cnt++;
-               }
-       }
-       if (pvh_eh != PV_HASHED_ENTRY_NULL) {
-               PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pvh_cnt);
-       }
-done:
-       UNLOCK_PVH(pai);
-
-       PMAP_TRACE(PMAP_CODE(PMAP__PAGE_PROTECT) | DBG_FUNC_END,
-                  0, 0, 0, 0, 0);
-}
-
 
 /*
  *     Routine:
 
 /*
  *     Routine:
@@ -2019,398 +1145,6 @@ pmap_map_block(
        }
 }
 
        }
 }
 
-
-/*
- *     Insert the given physical page (p) at
- *     the specified virtual address (v) in the
- *     target physical map with the protection requested.
- *
- *     If specified, the page will be wired down, meaning
- *     that the related pte cannot be reclaimed.
- *
- *     NB:  This is the only routine which MAY NOT lazy-evaluate
- *     or lose information.  That is, this routine must actually
- *     insert this page into the given map NOW.
- */
-void
-pmap_enter(
-       register pmap_t         pmap,
-       vm_map_offset_t         vaddr,
-       ppnum_t                 pn,
-       vm_prot_t               prot,
-       unsigned int            flags,
-       boolean_t               wired)
-{
-       pt_entry_t              *pte;
-       pv_rooted_entry_t       pv_h;
-       int                     pai;
-       pv_hashed_entry_t       pvh_e;
-       pv_hashed_entry_t       pvh_new;
-       pt_entry_t              template;
-       pmap_paddr_t            old_pa;
-       pmap_paddr_t            pa = (pmap_paddr_t) i386_ptob(pn);
-       boolean_t               need_tlbflush = FALSE;
-       boolean_t               set_NX;
-       char                    oattr;
-       boolean_t               old_pa_locked;
-       boolean_t               superpage = flags & VM_MEM_SUPERPAGE;
-       vm_object_t             delpage_pm_obj = NULL;
-       int                     delpage_pde_index = 0;
-
-
-       pmap_intr_assert();
-       assert(pn != vm_page_fictitious_addr);
-       if (pmap_debug)
-               kprintf("pmap_enter(%p,%llu,%u)\n", pmap, vaddr, pn);
-       if (pmap == PMAP_NULL)
-               return;
-       if (pn == vm_page_guard_addr)
-               return;
-
-       PMAP_TRACE(PMAP_CODE(PMAP__ENTER) | DBG_FUNC_START,
-                  pmap,
-                  (uint32_t) (vaddr >> 32), (uint32_t) vaddr,
-                  pn, prot);
-
-       if ((prot & VM_PROT_EXECUTE) || !nx_enabled || !pmap->nx_enabled)
-               set_NX = FALSE;
-       else
-               set_NX = TRUE;
-
-       /*
-        *      Must allocate a new pvlist entry while we're unlocked;
-        *      zalloc may cause pageout (which will lock the pmap system).
-        *      If we determine we need a pvlist entry, we will unlock
-        *      and allocate one.  Then we will retry, throughing away
-        *      the allocated entry later (if we no longer need it).
-        */
-
-       pvh_new = PV_HASHED_ENTRY_NULL;
-Retry:
-       pvh_e = PV_HASHED_ENTRY_NULL;
-
-       PMAP_LOCK(pmap);
-
-       /*
-        *      Expand pmap to include this pte.  Assume that
-        *      pmap is always expanded to include enough hardware
-        *      pages to map one VM page.
-        */
-        if(superpage) {
-               while ((pte = pmap64_pde(pmap, vaddr)) == PD_ENTRY_NULL) {
-                       /* need room for another pde entry */
-                       PMAP_UNLOCK(pmap);
-                       pmap_expand_pdpt(pmap, vaddr);
-                       PMAP_LOCK(pmap);
-               }
-       } else {
-               while ((pte = pmap_pte(pmap, vaddr)) == PT_ENTRY_NULL) {
-                       /*
-                        * Must unlock to expand the pmap
-                        * going to grow pde level page(s)
-                        */
-                       PMAP_UNLOCK(pmap);
-                       pmap_expand(pmap, vaddr);
-                       PMAP_LOCK(pmap);
-               }
-       }
-
-       if (superpage && *pte && !(*pte & INTEL_PTE_PS)) {
-               /*
-                * There is still an empty page table mapped that
-                * was used for a previous base page mapping.
-                * Remember the PDE and the PDE index, so that we
-                * can free the page at the end of this function.
-                */
-               delpage_pde_index = (int)pdeidx(pmap, vaddr);
-               delpage_pm_obj = pmap->pm_obj;
-               *pte = 0;
-       }
-
-       old_pa = pte_to_pa(*pte);
-       pai = pa_index(old_pa);
-       old_pa_locked = FALSE;
-
-       /*
-        * if we have a previous managed page, lock the pv entry now. after
-        * we lock it, check to see if someone beat us to the lock and if so
-        * drop the lock
-        */
-       if ((0 != old_pa) && IS_MANAGED_PAGE(pai)) {
-               LOCK_PVH(pai);
-               old_pa_locked = TRUE;
-               old_pa = pte_to_pa(*pte);
-               if (0 == old_pa) {
-                       UNLOCK_PVH(pai);        /* another path beat us to it */
-                       old_pa_locked = FALSE;
-               }
-       }
-
-       /*
-        *      Special case if the incoming physical page is already mapped
-        *      at this address.
-        */
-       if (old_pa == pa) {
-
-               /*
-                *      May be changing its wired attribute or protection
-                */
-
-               template = pa_to_pte(pa) | INTEL_PTE_VALID;
-
-               if (VM_MEM_NOT_CACHEABLE ==
-                   (flags & (VM_MEM_NOT_CACHEABLE | VM_WIMG_USE_DEFAULT))) {
-                       if (!(flags & VM_MEM_GUARDED))
-                               template |= INTEL_PTE_PTA;
-                       template |= INTEL_PTE_NCACHE;
-               }
-               if (pmap != kernel_pmap)
-                       template |= INTEL_PTE_USER;
-               if (prot & VM_PROT_WRITE)
-                       template |= INTEL_PTE_WRITE;
-
-               if (set_NX)
-                       template |= INTEL_PTE_NX;
-
-               if (wired) {
-                       template |= INTEL_PTE_WIRED;
-                       if (!iswired(*pte))
-                               OSAddAtomic(+1,
-                                       &pmap->stats.wired_count);
-               } else {
-                       if (iswired(*pte)) {
-                               assert(pmap->stats.wired_count >= 1);
-                               OSAddAtomic(-1,
-                                       &pmap->stats.wired_count);
-                       }
-               }
-               if (superpage)          /* this path can not be used */
-                       template |= INTEL_PTE_PS;       /* to change the page size! */
-
-               /* store modified PTE and preserve RC bits */
-               pmap_update_pte(pte, *pte,
-                       template | (*pte & (INTEL_PTE_REF | INTEL_PTE_MOD)));
-               if (old_pa_locked) {
-                       UNLOCK_PVH(pai);
-                       old_pa_locked = FALSE;
-               }
-               need_tlbflush = TRUE;
-               goto Done;
-       }
-
-       /*
-        *      Outline of code from here:
-        *         1) If va was mapped, update TLBs, remove the mapping
-        *            and remove old pvlist entry.
-        *         2) Add pvlist entry for new mapping
-        *         3) Enter new mapping.
-        *
-        *      If the old physical page is not managed step 1) is skipped
-        *      (except for updating the TLBs), and the mapping is
-        *      overwritten at step 3).  If the new physical page is not
-        *      managed, step 2) is skipped.
-        */
-
-       if (old_pa != (pmap_paddr_t) 0) {
-
-               /*
-                *      Don't do anything to pages outside valid memory here.
-                *      Instead convince the code that enters a new mapping
-                *      to overwrite the old one.
-                */
-
-               /* invalidate the PTE */
-               pmap_update_pte(pte, *pte, (*pte & ~INTEL_PTE_VALID));
-               /* propagate invalidate everywhere */
-               PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
-               /* remember reference and change */
-               oattr = (char) (*pte & (PHYS_MODIFIED | PHYS_REFERENCED));
-               /* completely invalidate the PTE */
-               pmap_store_pte(pte, 0);
-
-               if (IS_MANAGED_PAGE(pai)) {
-#if TESTING
-                       if (pmap->stats.resident_count < 1)
-                               panic("pmap_enter: resident_count");
-#endif
-                       assert(pmap->stats.resident_count >= 1);
-                       OSAddAtomic(-1,
-                               &pmap->stats.resident_count);
-
-                       if (iswired(*pte)) {
-#if TESTING
-                               if (pmap->stats.wired_count < 1)
-                                       panic("pmap_enter: wired_count");
-#endif
-                               assert(pmap->stats.wired_count >= 1);
-                               OSAddAtomic(-1,
-                                       &pmap->stats.wired_count);
-                       }
-                       pmap_phys_attributes[pai] |= oattr;
-
-                       /*
-                        *      Remove the mapping from the pvlist for
-                        *      this physical page.
-                        *      We'll end up with either a rooted pv or a
-                        *      hashed pv
-                        */
-                       pvh_e = pmap_pv_remove(pmap, vaddr, (ppnum_t) pai);
-
-               } else {
-
-                       /*
-                        *      old_pa is not managed.
-                        *      Do removal part of accounting.
-                        */
-
-                       if (iswired(*pte)) {
-                               assert(pmap->stats.wired_count >= 1);
-                               OSAddAtomic(-1,
-                                       &pmap->stats.wired_count);
-                       }
-               }
-       }
-
-       /*
-        * if we had a previously managed paged locked, unlock it now
-        */
-       if (old_pa_locked) {
-               UNLOCK_PVH(pai);
-               old_pa_locked = FALSE;
-       }
-
-       pai = pa_index(pa);     /* now working with new incoming phys page */
-       if (IS_MANAGED_PAGE(pai)) {
-
-               /*
-                *      Step 2) Enter the mapping in the PV list for this
-                *      physical page.
-                */
-               pv_h = pai_to_pvh(pai);
-
-               LOCK_PVH(pai);
-
-               if (pv_h->pmap == PMAP_NULL) {
-                       /*
-                        *      No mappings yet, use rooted pv
-                        */
-                       pv_h->va = vaddr;
-                       pv_h->pmap = pmap;
-                       queue_init(&pv_h->qlink);
-               } else {
-                       /*
-                        *      Add new pv_hashed_entry after header.
-                        */
-                       if ((PV_HASHED_ENTRY_NULL == pvh_e) && pvh_new) {
-                               pvh_e = pvh_new;
-                               pvh_new = PV_HASHED_ENTRY_NULL;
-                       } else if (PV_HASHED_ENTRY_NULL == pvh_e) {
-                               PV_HASHED_ALLOC(pvh_e);
-                               if (PV_HASHED_ENTRY_NULL == pvh_e) {
-                                       /*
-                                        * the pv list is empty. if we are on
-                                        * the kernel pmap we'll use one of
-                                        * the special private kernel pv_e's,
-                                        * else, we need to unlock
-                                        * everything, zalloc a pv_e, and
-                                        * restart bringing in the pv_e with
-                                        * us.
-                                        */
-                                       if (kernel_pmap == pmap) {
-                                               PV_HASHED_KERN_ALLOC(pvh_e);
-                                       } else {
-                                               UNLOCK_PVH(pai);
-                                               PMAP_UNLOCK(pmap);
-                                               pvh_new = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone);
-                                               goto Retry;
-                                       }
-                               }
-                       }
-                       if (PV_HASHED_ENTRY_NULL == pvh_e)
-                               panic("pvh_e exhaustion");
-
-                       pvh_e->va = vaddr;
-                       pvh_e->pmap = pmap;
-                       pvh_e->ppn = pn;
-                       pv_hash_add(pvh_e, pv_h);
-
-                       /*
-                        *      Remember that we used the pvlist entry.
-                        */
-                       pvh_e = PV_HASHED_ENTRY_NULL;
-               }
-
-               /*
-                * only count the mapping
-                * for 'managed memory'
-                */
-               OSAddAtomic(+1,  & pmap->stats.resident_count);
-               if (pmap->stats.resident_count > pmap->stats.resident_max) {
-                       pmap->stats.resident_max = pmap->stats.resident_count;
-               }
-       }
-       /*
-        * Step 3) Enter the mapping.
-        *
-        *      Build a template to speed up entering -
-        *      only the pfn changes.
-        */
-       template = pa_to_pte(pa) | INTEL_PTE_VALID;
-
-       if (flags & VM_MEM_NOT_CACHEABLE) {
-               if (!(flags & VM_MEM_GUARDED))
-                       template |= INTEL_PTE_PTA;
-               template |= INTEL_PTE_NCACHE;
-       }
-       if (pmap != kernel_pmap)
-               template |= INTEL_PTE_USER;
-       if (prot & VM_PROT_WRITE)
-               template |= INTEL_PTE_WRITE;
-       if (set_NX)
-               template |= INTEL_PTE_NX;
-       if (wired) {
-               template |= INTEL_PTE_WIRED;
-               OSAddAtomic(+1,  & pmap->stats.wired_count);
-       }
-       if (superpage)
-               template |= INTEL_PTE_PS;
-       pmap_store_pte(pte, template);
-
-       /*
-        * if this was a managed page we delayed unlocking the pv until here
-        * to prevent pmap_page_protect et al from finding it until the pte
-        * has been stored
-        */
-       if (IS_MANAGED_PAGE(pai)) {
-               UNLOCK_PVH(pai);
-       }
-Done:
-       if (need_tlbflush == TRUE)
-               PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
-
-       if (pvh_e != PV_HASHED_ENTRY_NULL) {
-               PV_HASHED_FREE_LIST(pvh_e, pvh_e, 1);
-       }
-       if (pvh_new != PV_HASHED_ENTRY_NULL) {
-               PV_HASHED_KERN_FREE_LIST(pvh_new, pvh_new, 1);
-       }
-       PMAP_UNLOCK(pmap);
-
-       if (delpage_pm_obj) {
-               vm_page_t m;
-
-               vm_object_lock(delpage_pm_obj);
-               m = vm_page_lookup(delpage_pm_obj, delpage_pde_index);
-               if (m == VM_PAGE_NULL)
-                   panic("pmap_enter: pte page not in object");
-               VM_PAGE_FREE(m);
-               OSAddAtomic(-1,  &inuse_ptepages_count);
-               vm_object_unlock(delpage_pm_obj);
-       }
-
-       PMAP_TRACE(PMAP_CODE(PMAP__ENTER) | DBG_FUNC_END, 0, 0, 0, 0, 0);
-}
-
 /*
  *     Routine:        pmap_change_wiring
  *     Function:       Change the wiring attribute for a map/virtual-address
 /*
  *     Routine:        pmap_change_wiring
  *     Function:       Change the wiring attribute for a map/virtual-address
@@ -3341,96 +2075,6 @@ phys_page_exists(ppnum_t pn)
        return TRUE;
 }
 
        return TRUE;
 }
 
-void
-mapping_free_prime(void)
-{
-       int                     i;
-       pv_hashed_entry_t       pvh_e;
-       pv_hashed_entry_t       pvh_eh;
-       pv_hashed_entry_t       pvh_et;
-       int                     pv_cnt;
-
-       pv_cnt = 0;
-       pvh_eh = pvh_et = PV_HASHED_ENTRY_NULL;
-       for (i = 0; i < (5 * PV_HASHED_ALLOC_CHUNK); i++) {
-               pvh_e = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone);
-
-               pvh_e->qlink.next = (queue_entry_t)pvh_eh;
-               pvh_eh = pvh_e;
-
-               if (pvh_et == PV_HASHED_ENTRY_NULL)
-                       pvh_et = pvh_e;
-               pv_cnt++;
-       }
-       PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pv_cnt);
-
-       pv_cnt = 0;
-       pvh_eh = pvh_et = PV_HASHED_ENTRY_NULL;
-       for (i = 0; i < PV_HASHED_KERN_ALLOC_CHUNK; i++) {
-               pvh_e = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone);
-
-               pvh_e->qlink.next = (queue_entry_t)pvh_eh;
-               pvh_eh = pvh_e;
-
-               if (pvh_et == PV_HASHED_ENTRY_NULL)
-                       pvh_et = pvh_e;
-               pv_cnt++;
-       }
-       PV_HASHED_KERN_FREE_LIST(pvh_eh, pvh_et, pv_cnt);
-
-}
-
-void
-mapping_adjust(void)
-{
-       pv_hashed_entry_t       pvh_e;
-       pv_hashed_entry_t       pvh_eh;
-       pv_hashed_entry_t       pvh_et;
-       int                     pv_cnt;
-       int                     i;
-
-       if (mapping_adjust_call == NULL) {
-               thread_call_setup(&mapping_adjust_call_data,
-                                 (thread_call_func_t) mapping_adjust,
-                                 (thread_call_param_t) NULL);
-               mapping_adjust_call = &mapping_adjust_call_data;
-       }
-
-       pv_cnt = 0;
-       pvh_eh = pvh_et = PV_HASHED_ENTRY_NULL;
-       if (pv_hashed_kern_free_count < PV_HASHED_KERN_LOW_WATER_MARK) {
-               for (i = 0; i < PV_HASHED_KERN_ALLOC_CHUNK; i++) {
-                       pvh_e = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone);
-
-                       pvh_e->qlink.next = (queue_entry_t)pvh_eh;
-                       pvh_eh = pvh_e;
-
-                       if (pvh_et == PV_HASHED_ENTRY_NULL)
-                               pvh_et = pvh_e;
-                       pv_cnt++;
-               }
-               PV_HASHED_KERN_FREE_LIST(pvh_eh, pvh_et, pv_cnt);
-       }
-
-       pv_cnt = 0;
-       pvh_eh = pvh_et = PV_HASHED_ENTRY_NULL;
-       if (pv_hashed_free_count < PV_HASHED_LOW_WATER_MARK) {
-               for (i = 0; i < PV_HASHED_ALLOC_CHUNK; i++) {
-                       pvh_e = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone);
-
-                       pvh_e->qlink.next = (queue_entry_t)pvh_eh;
-                       pvh_eh = pvh_e;
-
-                       if (pvh_et == PV_HASHED_ENTRY_NULL)
-                               pvh_et = pvh_e;
-                       pv_cnt++;
-               }
-               PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pv_cnt);
-       }
-       mappingrecurse = 0;
-}
-
-
 void
 pmap_switch(pmap_t tpmap)
 {
 void
 pmap_switch(pmap_t tpmap)
 {
index 4c5b5f07d0a4e9684ba471ba5890dc62783cf9f5..0e8749b2b2cdd56e4f5ab73936f95c78113721e6 100644 (file)
@@ -260,3 +260,9 @@ getval(
        *val = 1;
        return (NUM);
 }
        *val = 1;
        return (NUM);
 }
+
+boolean_t 
+PE_imgsrc_mount_supported()
+{
+       return TRUE;
+}
index 6143820965c83e6ff5b3d7d5ce3f6ff0d895de6f..5a8fa5eff404a38874ddba79d85b82fcc97574ba 100644 (file)
@@ -184,15 +184,15 @@ void PE_init_platform(boolean_t vm_initialized, void * _args)
                /* Hack! FIXME.. */ 
         outb(0x21, 0xff);   /* Maskout all interrupts Pic1 */
         outb(0xa1, 0xff);   /* Maskout all interrupts Pic2 */
                /* Hack! FIXME.. */ 
         outb(0x21, 0xff);   /* Maskout all interrupts Pic1 */
         outb(0xa1, 0xff);   /* Maskout all interrupts Pic2 */
         if (PE_state.deviceTreeHead) {
             DTInit(PE_state.deviceTreeHead);
         if (PE_state.deviceTreeHead) {
             DTInit(PE_state.deviceTreeHead);
-    }
+        }
 
         pe_identify_machine(args);
     } else {
         pe_init_debug();
     }
 
         pe_identify_machine(args);
     } else {
         pe_init_debug();
     }
+
 }
 
 void PE_create_console( void )
 }
 
 void PE_create_console( void )
@@ -274,5 +274,8 @@ PE_stub_poll_input(__unused unsigned int options, char * c)
 int (*PE_poll_input)(unsigned int options, char * c)
        = PE_stub_poll_input;
 
 int (*PE_poll_input)(unsigned int options, char * c)
        = PE_stub_poll_input;
 
-
-
+boolean_t
+PE_reboot_on_panic(void)
+{
+       return FALSE;
+}
index 3dd73dad7c419ea4a9287eb5f07255a6920c606a..7c3596f9f3a10fc252e23503a34a215395e5b67d 100644 (file)
@@ -189,6 +189,8 @@ typedef struct PE_Video       PE_Video;
 
 extern void initialize_screen(PE_Video *, unsigned int);
 
 
 extern void initialize_screen(PE_Video *, unsigned int);
 
+extern void dim_screen(void);
+
 extern int PE_current_console(
        PE_Video *info);
 
 extern int PE_current_console(
        PE_Video *info);
 
@@ -275,6 +277,12 @@ extern void PE_cpu_machine_quiesce(
 
 extern void pe_init_debug(void);
 
 
 extern void pe_init_debug(void);
 
+extern boolean_t PE_imgsrc_mount_supported(void);
+
+
+#if KERNEL_PRIVATE
+boolean_t PE_reboot_on_panic(void);
+#endif
 
 __END_DECLS
 
 
 __END_DECLS