]> git.saurik.com Git - apple/xnu.git/commitdiff
xnu-1504.3.12.tar.gz mac-os-x-1063 v1504.3.12
authorApple <opensource@apple.com>
Mon, 29 Mar 2010 19:14:35 +0000 (19:14 +0000)
committerApple <opensource@apple.com>
Mon, 29 Mar 2010 19:14:35 +0000 (19:14 +0000)
150 files changed:
bsd/conf/MASTER
bsd/conf/MASTER.i386
bsd/conf/MASTER.ppc
bsd/conf/MASTER.x86_64
bsd/conf/files
bsd/dev/i386/sysctl.c
bsd/hfs/hfs.h
bsd/hfs/hfs_cnode.c
bsd/hfs/hfs_cnode.h
bsd/hfs/hfs_readwrite.c
bsd/hfs/hfs_vfsops.c
bsd/hfs/hfs_vfsutils.c
bsd/hfs/hfs_vnops.c
bsd/hfs/hfs_xattr.c
bsd/hfs/hfscommon/BTree/BTreeAllocate.c
bsd/kern/imageboot.c
bsd/kern/kdebug.c
bsd/kern/kern_event.c
bsd/kern/kern_memorystatus.c
bsd/kern/kern_mman.c
bsd/kern/kern_resource.c
bsd/kern/kern_sysctl.c
bsd/kern/pthread_synch.c
bsd/kern/sys_generic.c
bsd/kern/syscalls.master
bsd/kern/uipc_usrreq.c
bsd/net/Makefile
bsd/net/bridge.c [deleted file]
bsd/net/bridge.h [deleted file]
bsd/net/bridgestp.c [new file with mode: 0644]
bsd/net/dlil.c
bsd/net/ether_at_pr_module.c
bsd/net/ether_if_module.c
bsd/net/ether_inet6_pr_module.c
bsd/net/ether_inet_pr_module.c
bsd/net/ethernet.h
bsd/net/if.h
bsd/net/if_bridge.c [new file with mode: 0644]
bsd/net/if_bridgevar.h [new file with mode: 0644]
bsd/net/if_ethersubr.c
bsd/net/if_llc.h
bsd/net/if_types.h
bsd/net/if_var.h
bsd/net/if_vlan.c
bsd/net/pf.c
bsd/net/pf_ioctl.c
bsd/net/pfvar.h
bsd/net/route.c
bsd/netinet/in_arp.c
bsd/netinet/ip_dummynet.c
bsd/netinet/ip_output.c
bsd/netinet/tcp_input.c
bsd/netinet6/in6.c
bsd/netinet6/in6_ifattach.c
bsd/netinet6/in6_proto.c
bsd/netinet6/ip6_input.c
bsd/netinet6/ip6_mroute.c
bsd/netinet6/ip6_mroute.h
bsd/netinet6/ip6_output.c
bsd/netinet6/ip6_var.h
bsd/netinet6/ipsec.c
bsd/netinet6/mld6.c
bsd/netinet6/nd6.c
bsd/netinet6/raw_ip6.c
bsd/nfs/nfs_socket.c
bsd/sys/buf_internal.h
bsd/sys/kern_memorystatus.h
bsd/sys/mount.h
bsd/sys/mount_internal.h
bsd/sys/pthread_internal.h
bsd/sys/resource.h
bsd/sys/socketvar.h
bsd/sys/sockio.h
bsd/sys/ubc_internal.h
bsd/sys/vnode_internal.h
bsd/vfs/vfs_bio.c
bsd/vfs/vfs_cluster.c
bsd/vfs/vfs_conf.c
bsd/vfs/vfs_subr.c
bsd/vfs/vfs_syscalls.c
config/Makefile
config/MasterVersion
config/generate_linker_exports.sh [new file with mode: 0755]
iokit/Kernel/IODMACommand.cpp
iokit/Kernel/IOServicePM.cpp
kgmacros
libkern/c++/OSKext.cpp
libkern/conf/MASTER
libkern/conf/files
libkern/kxld/Makefile
libkern/kxld/kxld_array.c
libkern/kxld/kxld_demangle.c [new file with mode: 0644]
libkern/kxld/kxld_demangle.h [new file with mode: 0644]
libkern/kxld/kxld_kext.c
libkern/kxld/kxld_util.h
libkern/kxld/kxld_vtable.c
libkern/libkern/OSAtomic.h
libkern/libkern/c++/OSKext.h
libkern/mkext.c
libkern/zlib/adler32.c
libkern/zlib/arm/adler32vec.s [new file with mode: 0644]
libkern/zlib/arm/inffastS.s [new file with mode: 0644]
libkern/zlib/inffast.c
makedefs/MakeInc.def
makedefs/MakeInc.rule
osfmk/conf/MASTER
osfmk/console/panic_dialog.c
osfmk/console/video_console.c
osfmk/i386/AT386/model_dep.c
osfmk/i386/cpu_capabilities.h
osfmk/i386/cpuid.c
osfmk/i386/cpuid.h
osfmk/i386/lapic.c
osfmk/i386/loose_ends.c
osfmk/i386/pmCPU.c
osfmk/i386/pmCPU.h
osfmk/i386/pmap.c
osfmk/i386/pmap.h
osfmk/i386/pmap_internal.h
osfmk/i386/pmap_x86_common.c
osfmk/ipc/ipc_kmsg.c
osfmk/ipc/ipc_kmsg.h
osfmk/ipc/ipc_port.c
osfmk/ipc/ipc_port.h
osfmk/ipc/mach_port.c
osfmk/kdp/kdp.c
osfmk/kdp/kdp_dyld.h [new file with mode: 0644]
osfmk/kdp/kdp_udp.c
osfmk/kern/debug.c
osfmk/kern/debug.h
osfmk/kern/processor.c
osfmk/kern/processor.h
osfmk/kern/sched.h
osfmk/kern/sched_prim.c
osfmk/kern/sched_prim.h
osfmk/kern/task_policy.c
osfmk/kern/thread.c
osfmk/kern/thread.h
osfmk/kern/thread_call.c
osfmk/mach/task_policy.h
osfmk/mach/vm_prot.h
osfmk/ppc/machine_routines.c
osfmk/vm/vm_fault.c
osfmk/vm/vm_map.c
osfmk/vm/vm_map.h
osfmk/x86_64/loose_ends.c
osfmk/x86_64/pmap.c
pexpert/gen/bootargs.c
pexpert/i386/pe_init.c
pexpert/pexpert/pexpert.h

index ec9ff0940714cc0501776ebf2d6655a06ffd386d..36c667094d078a4106b3a0bcfbb8c15e25d3d3ae 100644 (file)
@@ -195,6 +195,7 @@ options             QUOTA           # file system quotas            # <quota>
 options                REV_ENDIAN_FS   # Reverse Endian FS             # <revfs>
 options                NAMEDSTREAMS    # named stream vnop support     # <namedstreams>
 options                CONFIG_VOLFS    # volfs path support (legacy)   # <config_volfs>
+options                CONFIG_IMGSRC_ACCESS # source of imageboot dmg  # <config_imgsrc_access>
 
 #
 # NFS support
@@ -245,6 +246,8 @@ options                     randomipid              # <inet,randomipid>
 
 options                ZLIB            # inflate/deflate support       # <zlib>
 
+options                IF_BRIDGE                       # <if_bridge>
+
 makeoptions    LIBDRIVER = "libDriver_kern.o"                  # <libdriver>
 makeoptions    LIBOBJC   = "libkobjc.o"                        # <kernobjc>
 
@@ -306,6 +309,9 @@ options   CONFIG_VFS_NAMES=4096             # <medium>
 options   CONFIG_VFS_NAMES=3072                # <small,xsmall>
 options   CONFIG_VFS_NAMES=2048                # <bsmall>
 
+options   CONFIG_MAX_CLUSTERS=8                # <xlarge,large,medium>
+options   CONFIG_MAX_CLUSTERS=4                # <small,xsmall,bsmall>
+
 #
 #  configurable kauth credential related resources 
 #
@@ -409,6 +415,10 @@ options   CONFIG_EMBEDDED                  # <config_embedded>
 #
 options   CONFIG_ENFORCE_SIGNED_CODE           # <config_embedded>
 
+# support dynamic signing of code
+#
+options                CONFIG_DYNAMIC_CODE_SIGNING     # <dynamic_codesigning>
+
 #
 # code decryption... used on embedded for app protection
 # must be set in all the bsd/conf and osfmk/conf MASTER files
index 08eca2cbc943143890692ab2f963f8441dffd38c..b953aaed944fc41fc863b6b59b97fac929a9e93c 100644 (file)
@@ -45,8 +45,8 @@
 #  Standard Apple Research Configurations:
 #  -------- ----- -------- ---------------
 #  BASE =        [ intel mach medium config_dtrace vol pst gdb kernobjc fixpri simple_clock mdebug kernserv driverkit uxpr kernstack ipc_compat ipc_debug sysv_sem sysv_msg sysv_shm audit panic_info config_imageboot config_workqueue psynch ]
-#  FILESYS =    [ devfs revfs hfs journaling fdesc config_fse quota namedstreams fifo union config_volfs hfs_compression ]
-#  NETWORKING =  [ inet inet6 compat_oldsock mrouting tcpdrop_synfin bpfilter ipdivert ipfirewall ipv6firewall ipfw2 dummynet traffic_mgt sendfile netmibs bond vlan gif stf zlib randomipid ifnet_input_chk config_mbuf_jumbo ipflow ]
+#  FILESYS =    [ devfs revfs hfs journaling fdesc config_fse quota namedstreams fifo union config_volfs hfs_compression config_imgsrc_access ]
+#  NETWORKING =  [ inet inet6 compat_oldsock tcpdrop_synfin bpfilter ipdivert ipfirewall ipv6firewall ipfw2 dummynet traffic_mgt sendfile netmibs bond vlan gif stf zlib randomipid ifnet_input_chk config_mbuf_jumbo ipflow ]
 #  NFS =         [ nfsclient nfsserver ]
 #  VPN =         [ ipsec ]
 #  RELEASE =     [ BASE NETWORKING NFS VPN FILESYS libdriver ]
@@ -56,7 +56,7 @@
 #
 #  EMBEDDED_BASE =     [ intel mach bsmall vol pst gdb kernobjc fixpri simple_clock mdebug kernserv driverkit uxpr kernstack ipc_compat ipc_debug sysv_sem sysv_msg sysv_shm audit panic_info config_imageboot config_workqueue psynch ]
 #  EMBEDDED_FILESYS =  [ devfs hfs journaling fdesc fifo ]
-#  EMBEDDED_NET =      [ inet compat_oldsock mrouting tcpdrop_synfin bpfilter config_mbuf_noexpand ]
+#  EMBEDDED_NET =      [ inet compat_oldsock tcpdrop_synfin bpfilter config_mbuf_noexpand ]
 #  EMBEDDED =          [ EMBEDDED_BASE EMBEDDED_NET VPN EMBEDDED_FILESYS libdriver no_printf_str no_kprintf_str no_kdebug ]
 #  DEVELOPMENT =       [ EMBEDDED_BASE EMBEDDED_NET NFS VPN EMBEDDED_FILESYS libdriver netmibs development mach_assert config_dtrace ]
 #
index 2a084643342a11c4790b20dbe7a6091f52e878af..54ba3d5658f1db8673b80d75f3506ee5ef6858dd 100644 (file)
@@ -47,7 +47,7 @@
 #
 #  BASE =        [ ppc mach medium config_dtrace vol pst gdb noprofiling simple_clock kernstack sysv_sem sysv_msg sysv_shm audit panic_info config_imageboot config_workqueue ]
 #  FILESYS =    [ devfs revfs hfs journaling fdesc config_fse quota namedstreams fifo union config_volfs hfs_compression ]
-#  NETWORKING =  [ inet inet6 compat_oldsock mrouting tcpdrop_synfin bpfilter ipdivert ipfirewall ipv6firewall ipfw2 dummynet traffic_mgt sendfile netmibs bond vlan gif stf zlib randomipid ifnet_input_chk ipflow ]
+#  NETWORKING =  [ inet inet6 compat_oldsock tcpdrop_synfin bpfilter ipdivert ipfirewall ipv6firewall ipfw2 dummynet traffic_mgt sendfile netmibs bond vlan gif stf zlib randomipid ifnet_input_chk ipflow ]
 #  NFS =         [ nfsclient nfsserver ]
 #  VPN =         [ ipsec ]
 #  RELEASE =     [ BASE NETWORKING NFS VPN FILESYS libdriver ]
index dd1f24e9691951ae5a76f49448e3bab45b7211c3..3815e81f0814706dfc6267ccca35cfc17283e4a2 100644 (file)
@@ -45,8 +45,8 @@
 #  Standard Apple Research Configurations:
 #  -------- ----- -------- ---------------
 #  BASE =        [ intel mach medium config_dtrace vol pst gdb kernobjc fixpri simple_clock mdebug kernserv driverkit uxpr kernstack ipc_compat ipc_debug sysv_sem sysv_msg sysv_shm audit panic_info config_imageboot config_workqueue psynch ]
-#  FILESYS =    [ devfs revfs hfs journaling fdesc config_fse quota namedstreams fifo union config_volfs hfs_compression ]
-#  NETWORKING =  [ inet inet6 compat_oldsock mrouting tcpdrop_synfin bpfilter ipdivert ipfirewall ipv6firewall ipfw2 dummynet traffic_mgt sendfile netmibs bond vlan gif stf zlib randomipid ifnet_input_chk config_mbuf_jumbo ipflow ]
+#  FILESYS =    [ devfs revfs hfs journaling fdesc config_fse quota namedstreams fifo union config_volfs hfs_compression config_imgsrc_access ]
+#  NETWORKING =  [ inet inet6 compat_oldsock tcpdrop_synfin bpfilter ipdivert ipfirewall ipv6firewall ipfw2 dummynet traffic_mgt sendfile netmibs bond vlan gif stf zlib randomipid ifnet_input_chk config_mbuf_jumbo ipflow ]
 #  NFS =         [ nfsclient nfsserver ]
 #  VPN =         [ ipsec ]
 #  RELEASE =     [ BASE NETWORKING NFS VPN FILESYS libdriver ]
@@ -56,7 +56,7 @@
 #
 #  EMBEDDED_BASE =     [ intel mach bsmall vol pst gdb kernobjc fixpri simple_clock mdebug kernserv driverkit uxpr kernstack ipc_compat ipc_debug sysv_sem sysv_msg sysv_shm audit panic_info config_imageboot config_workqueue psynch ]
 #  EMBEDDED_FILESYS =  [ devfs hfs journaling fdesc fifo ]
-#  EMBEDDED_NET =      [ inet compat_oldsock mrouting tcpdrop_synfin bpfilter config_mbuf_noexpand ]
+#  EMBEDDED_NET =      [ inet compat_oldsock tcpdrop_synfin bpfilter config_mbuf_noexpand ]
 #  EMBEDDED =          [ EMBEDDED_BASE EMBEDDED_NET VPN EMBEDDED_FILESYS libdriver no_printf_str no_kprintf_str no_kdebug ]
 #  DEVELOPMENT =       [ EMBEDDED_BASE EMBEDDED_NET NFS VPN EMBEDDED_FILESYS libdriver netmibs development mach_assert ]
 #
index 95f856c21f260df3046830c93f4707b22740b30f..fce436ec6129ddda73ba3526163a28fcc8c9e753 100644 (file)
@@ -107,7 +107,7 @@ OPTIONS/ipfw2                               optional ipfw2
 OPTIONS/ipfirewall                     optional ipfirewall
 OPTIONS/ipv6firewall           optional ipv6firewall
 OPTIONS/tcpdebug                       optional tcpdebug
-OPTIONS/bridge                         optional bridge
+OPTIONS/if_bridge                      optional if_bridge
 OPTIONS/faith                          optional faith
 OPTIONS/gif                                    optional gif
 OPTIONS/netat                          optional netat
@@ -200,7 +200,8 @@ bsd/kern/decmpfs.c                  standard
 
 bsd/net/bpf.c                          optional bpfilter
 bsd/net/bpf_filter.c                   optional bpfilter
-bsd/net/bridge.c                       optional bridge
+bsd/net/if_bridge.c                    optional if_bridge
+bsd/net/bridgestp.c                    optional if_bridge
 bsd/net/bsd_comp.c                     optional ppp_bsdcomp
 bsd/net/if.c                           optional networking
 bsd/net/if_atmsubr.c                   optional atm
index 2b9609d530dff3b88078cf3283bb972e9903ad32..597a208c1714bf6643c778fb7b85e6c8438a8d81 100644 (file)
@@ -314,6 +314,12 @@ SYSCTL_PROC(_machdep_cpu_thermal, OID_AUTO, dynamic_acceleration,
            sizeof(boolean_t),
            cpu_thermal, "I", "Dynamic Acceleration Technology (Turbo Mode)");
 
+SYSCTL_PROC(_machdep_cpu_thermal, OID_AUTO, invariant_APIC_timer,
+           CTLTYPE_INT | CTLFLAG_RD, 
+           (void *)offsetof(cpuid_thermal_leaf_t, invariant_APIC_timer),
+           sizeof(boolean_t),
+           cpu_thermal, "I", "Invariant APIC Timer");
+
 SYSCTL_PROC(_machdep_cpu_thermal, OID_AUTO, thresholds,
            CTLTYPE_INT | CTLFLAG_RD, 
            (void *)offsetof(cpuid_thermal_leaf_t, thresholds),
index 67ecb6d1ee40219ba0bf70f86e0c973dfc13a3a5..beb10099f697171fad4420d58552dc58c2a59459 100644 (file)
@@ -755,7 +755,7 @@ extern int hfs_btsync(struct vnode *vp, int sync_transaction);
 extern void replace_desc(struct cnode *cp, struct cat_desc *cdp);
 
 extern int hfs_vgetrsrc(struct hfsmount *hfsmp, struct vnode *vp,
-                       struct vnode **rvpp, int can_drop_lock);
+                       struct vnode **rvpp, int can_drop_lock, int error_on_unlinked);
 
 extern int hfs_update(struct vnode *, int);
 
index 7123f603fe7d6317ba9a7448de1426280959b309..c17c8d4ddbe0ac5bf2eb1d81727ad78b824507c7 100644 (file)
@@ -59,6 +59,10 @@ static void  hfs_reclaim_cnode(struct cnode *);
 
 static int hfs_isordered(struct cnode *, struct cnode *);
 
+inline int hfs_checkdeleted (struct cnode *cp) {
+       return ((cp->c_flag & (C_DELETED | C_NOEXISTS)) ? ENOENT : 0);
+}
+
 
 /*
  * Last reference to an cnode.  If necessary, write or delete it.
@@ -195,7 +199,7 @@ hfs_vnop_inactive(struct vnop_inactive_args *ap)
                if ((cp->c_blocks > 0) && (forkcount == 1) && (vp != cp->c_rsrc_vp)) {
                        struct vnode *rvp = NULLVP;
 
-                       error = hfs_vgetrsrc(hfsmp, vp, &rvp, FALSE);
+                       error = hfs_vgetrsrc(hfsmp, vp, &rvp, FALSE, FALSE);
                        if (error)
                                goto out;
                        /*
@@ -612,9 +616,15 @@ hfs_getnewvnode(
                return (ENOENT);
        }
 
-       /* Hardlinks may need an updated catalog descriptor */
-       if ((cp->c_flag & C_HARDLINK) && descp->cd_nameptr && descp->cd_namelen > 0) {
-               replace_desc(cp, descp);
+       /* 
+        * Hardlinks may need an updated catalog descriptor.  However, if
+        * the cnode has already been marked as open-unlinked (C_DELETED), then don't
+        * replace its descriptor. 
+        */
+       if (!(hfs_checkdeleted(cp))) {
+               if ((cp->c_flag & C_HARDLINK) && descp->cd_nameptr && descp->cd_namelen > 0) {
+                       replace_desc(cp, descp);
+               }
        }
        /* Check if we found a matching vnode */
        if (*vpp != NULL)
index 27c1b9a55c348d3a89e9fc094bc8cc67571bb4d2..9ffb9a8ca277bc95f178e61f1001b5bf863fad5e 100644 (file)
@@ -227,6 +227,16 @@ enum { kFinderInvisibleMask = 1 << 14 };
                         FTOC(fp)->c_rsrc_vp :                  \
                         FTOC(fp)->c_vp)
 
+/*
+ * This is a helper function used for determining whether or not a cnode has become open
+ * unlinked in between the time we acquired its vnode and the time we acquire the cnode lock
+ * to start manipulating it.  Due to the SMP nature of VFS, it is probably necessary to 
+ * use this macro every time we acquire a cnode lock, as the content of the Cnode may have
+ * been modified in betweeen the lookup and a VNOP.  Whether or not to call this is dependent
+ * upon the VNOP in question.  Sometimes it is OK to use an open-unlinked file, for example, in,
+ * reading.  But other times, such as on the source of a VNOP_RENAME, it should be disallowed.
+ */
+int hfs_checkdeleted (struct cnode *cp);
 
 /*
  * Test for a resource fork
index 6dc30afad3270c1f85516de58e350d3a72a36f44..97578830da10d984f4270e50b54825fef5df0448 100644 (file)
@@ -3296,6 +3296,7 @@ hfs_vnop_pageout(struct vnop_pageout_args *ap)
        vm_offset_t     a_pl_offset;
        int             a_flags;
        int is_pageoutv2 = 0;
+       kern_return_t kret;
 
        cp = VTOC(vp);
        fp = VTOF(vp);
@@ -3339,9 +3340,9 @@ hfs_vnop_pageout(struct vnop_pageout_args *ap)
                else {
                        request_flags = UPL_UBC_PAGEOUT | UPL_RET_ONLY_DIRTY;
                }
-               ubc_create_upl(vp, ap->a_f_offset, ap->a_size, &upl, &pl, request_flags); 
+               kret = ubc_create_upl(vp, ap->a_f_offset, ap->a_size, &upl, &pl, request_flags); 
 
-               if (upl == (upl_t) NULL) {
+               if ((kret != KERN_SUCCESS) || (upl == (upl_t) NULL)) {
                        retval = EINVAL;
                        goto pageout_done;
                }
index 8148697b213dbbd6445697cb6efa64e87c2f0ec7..de087422b7e6bcdafa190b03879de361067c3e41 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999-2009 Apple Inc. All rights reserved.
+ * Copyright (c) 1999-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -114,6 +114,8 @@ int hfs_dbg_all = 0;
 int hfs_dbg_err = 0;
 #endif
 
+/* Enable/disable debugging code for live volume resizing */
+int hfs_resize_debug = 0;
 
 lck_grp_attr_t *  hfs_group_attr;
 lck_attr_t *  hfs_lock_attr;
@@ -146,8 +148,7 @@ static int hfs_unmount(struct mount *mp, int mntflags, vfs_context_t context);
 static int hfs_vptofh(struct vnode *vp, int *fhlenp, unsigned char *fhp, vfs_context_t context);
 
 static int hfs_reclaimspace(struct hfsmount *hfsmp, u_int32_t startblk, u_int32_t reclaimblks, vfs_context_t context);
-static int hfs_overlapped_overflow_extents(struct hfsmount *hfsmp, u_int32_t startblk,
-                                           u_int32_t catblks, u_int32_t fileID, int rsrcfork);
+static int hfs_overlapped_overflow_extents(struct hfsmount *hfsmp, u_int32_t startblk, u_int32_t fileID);
 static int hfs_journal_replay(vnode_t devvp, vfs_context_t context);
 
 
@@ -3803,17 +3804,18 @@ hfs_truncatefs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context)
        u_int32_t reclaimblks = 0;
        int lockflags = 0;
        int transaction_begun = 0;
+       Boolean updateFreeBlocks = false;
        int error;
 
-       lck_mtx_lock(&hfsmp->hfs_mutex);
+       HFS_MOUNT_LOCK(hfsmp, TRUE);    
        if (hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS) {
-               lck_mtx_unlock(&hfsmp->hfs_mutex);
+               HFS_MOUNT_UNLOCK(hfsmp, TRUE);  
                return (EALREADY);
        }
        hfsmp->hfs_flags |= HFS_RESIZE_IN_PROGRESS;
        hfsmp->hfs_resize_filesmoved = 0;
        hfsmp->hfs_resize_totalfiles = 0;
-       lck_mtx_unlock(&hfsmp->hfs_mutex);
+       HFS_MOUNT_UNLOCK(hfsmp, TRUE);  
 
        /*
         * - Journaled HFS Plus volumes only.
@@ -3828,18 +3830,23 @@ hfs_truncatefs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context)
        newblkcnt = newsize / hfsmp->blockSize;
        reclaimblks = hfsmp->totalBlocks - newblkcnt;
 
+       if (hfs_resize_debug) {
+               printf ("hfs_truncatefs: old: size=%qu, blkcnt=%u, freeblks=%u\n", oldsize, hfsmp->totalBlocks, hfs_freeblks(hfsmp, 1));
+               printf ("hfs_truncatefs: new: size=%qu, blkcnt=%u, reclaimblks=%u\n", newsize, newblkcnt, reclaimblks);
+       }
+
        /* Make sure new size is valid. */
        if ((newsize < HFS_MIN_SIZE) ||
            (newsize >= oldsize) ||
            (newsize % hfsmp->hfs_logical_block_size) ||
            (newsize % hfsmp->hfs_physical_block_size)) {
-               printf ("hfs_truncatefs: invalid size\n");
+               printf ("hfs_truncatefs: invalid size (newsize=%qu, oldsize=%qu)\n", newsize, oldsize);
                error = EINVAL;
                goto out;
        }
-       /* Make sure there's enough space to work with. */
+       /* Make sure that the file system has enough free blocks reclaim */
        if (reclaimblks >= hfs_freeblks(hfsmp, 1)) {
-               printf("hfs_truncatefs: insufficient space (need %u blocks; have %u blocks)\n", reclaimblks, hfs_freeblks(hfsmp, 1));
+               printf("hfs_truncatefs: insufficient space (need %u blocks; have %u free blocks)\n", reclaimblks, hfs_freeblks(hfsmp, 1));
                error = ENOSPC;
                goto out;
        }
@@ -3862,17 +3869,21 @@ hfs_truncatefs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context)
         * in the allocation blocks beyond (i.e. the blocks we're trying to
         * truncate away.
         */
-       lck_mtx_lock(&hfsmp->hfs_mutex);
+       HFS_MOUNT_LOCK(hfsmp, TRUE);    
        if (hfsmp->blockSize == 512) 
                hfsmp->allocLimit = newblkcnt - 2;
        else
                hfsmp->allocLimit = newblkcnt - 1;
+       /* Update the volume free block count to reflect the total number of 
+        * free blocks that will exist after a successful resize.
+        */
        hfsmp->freeBlocks -= reclaimblks;
-       lck_mtx_unlock(&hfsmp->hfs_mutex);
-       
+       updateFreeBlocks = true;
+       HFS_MOUNT_UNLOCK(hfsmp, TRUE);  
+
        /*
         * Look for files that have blocks at or beyond the location of the
-        * new alternate volume header.
+        * new alternate volume header
         */
        if (hfs_isallocated(hfsmp, hfsmp->allocLimit, reclaimblks)) {
                /*
@@ -3883,8 +3894,9 @@ hfs_truncatefs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context)
                transaction_begun = 0;
 
                /* Attempt to reclaim some space. */ 
-               if (hfs_reclaimspace(hfsmp, hfsmp->allocLimit, reclaimblks, context) != 0) {
-                       printf("hfs_truncatefs: couldn't reclaim space on %s\n", hfsmp->vcbVN);
+               error = hfs_reclaimspace(hfsmp, hfsmp->allocLimit, reclaimblks, context);
+               if (error != 0) {
+                       printf("hfs_truncatefs: couldn't reclaim space on %s (error=%d)\n", hfsmp->vcbVN, error);
                        error = ENOSPC;
                        goto out;
                }
@@ -3895,8 +3907,9 @@ hfs_truncatefs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context)
                transaction_begun = 1;
                
                /* Check if we're clear now. */
-               if (hfs_isallocated(hfsmp, hfsmp->allocLimit, reclaimblks)) {
-                       printf("hfs_truncatefs: didn't reclaim enough space on %s\n", hfsmp->vcbVN);
+               error = hfs_isallocated(hfsmp, hfsmp->allocLimit, reclaimblks);
+               if (error != 0) {
+                       printf("hfs_truncatefs: didn't reclaim enough space on %s (error=%d)\n", hfsmp->vcbVN, error);
                        error = EAGAIN;  /* tell client to try again */
                        goto out;
                }
@@ -3933,14 +3946,16 @@ hfs_truncatefs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context)
         * since this block will be outside of the truncated file system!
         */
        if (hfsmp->hfs_alt_id_sector) {
-               if (buf_meta_bread(hfsmp->hfs_devvp, 
+               error = buf_meta_bread(hfsmp->hfs_devvp, 
                                HFS_PHYSBLK_ROUNDDOWN(hfsmp->hfs_alt_id_sector, hfsmp->hfs_log_per_phys),
-                               hfsmp->hfs_physical_block_size, NOCRED, &bp) == 0) {
-       
+                               hfsmp->hfs_physical_block_size, NOCRED, &bp);
+               if (error == 0) {
                        bzero((void*)((char *)buf_dataptr(bp) + HFS_ALT_OFFSET(hfsmp->hfs_physical_block_size)), kMDBSize);
                        (void) VNOP_BWRITE(bp);
-               } else if (bp) {
-                       buf_brelse(bp);
+               } else {
+                       if (bp) {
+                               buf_brelse(bp);
+                       }
                }
                bp = NULL;
        }
@@ -3963,7 +3978,7 @@ hfs_truncatefs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context)
        /*
         * TODO: Adjust the size of the metadata zone based on new volume size?
         */
-        
+       
        /*
         * Adjust the size of hfsmp->hfs_attrdata_vp
         */
@@ -3985,15 +4000,14 @@ hfs_truncatefs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context)
        }
        
 out:
-       if (error)
-               hfsmp->freeBlocks += reclaimblks;
-       
        lck_mtx_lock(&hfsmp->hfs_mutex);
+       if (error && (updateFreeBlocks == true)) 
+               hfsmp->freeBlocks += reclaimblks;
        hfsmp->allocLimit = hfsmp->totalBlocks;
        if (hfsmp->nextAllocation >= hfsmp->allocLimit)
                hfsmp->nextAllocation = hfsmp->hfs_metazone_end + 1;
        hfsmp->hfs_flags &= ~HFS_RESIZE_IN_PROGRESS;
-       lck_mtx_unlock(&hfsmp->hfs_mutex);
+       HFS_MOUNT_UNLOCK(hfsmp, TRUE);  
        
        if (lockflags) {
                hfs_systemfile_unlock(hfsmp, lockflags);
@@ -4001,6 +4015,8 @@ out:
        if (transaction_begun) {
                hfs_end_transaction(hfsmp);
                hfs_journal_flush(hfsmp);
+               /* Just to be sure, sync all data to the disk */
+               (void) VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, context);
        }
 
        return (error);
@@ -4077,18 +4093,6 @@ hfs_copy_extent(
        if (cp != hfsmp->hfs_allocation_cp && cp->c_lockowner != current_thread())
                panic("hfs_copy_extent: vp=%p (cp=%p) not owned?\n", vp, cp);
 
-       /*
-        * Wait for any in-progress writes to this vnode to complete, so that we'll
-        * be copying consistent bits.  (Otherwise, it's possible that an async
-        * write will complete to the old extent after we read from it.  That
-        * could lead to corruption.)
-        */
-       err = vnode_waitforwrites(vp, 0, 0, 0, "hfs_copy_extent");
-       if (err) {
-               printf("hfs_copy_extent: Error %d from vnode_waitforwrites\n", err);
-               return err;
-       }
-       
        /*
         * Determine the I/O size to use
         *
@@ -4134,7 +4138,7 @@ hfs_copy_extent(
                buf_setcount(bp, ioSize);
                buf_setblkno(bp, destSector);
                buf_setlblkno(bp, destSector);
-               if (journal_uses_fua(hfsmp->jnl))
+               if (vnode_issystem(vp) && journal_uses_fua(hfsmp->jnl))
                        buf_markfua(bp);
                        
                /* Do the write */
@@ -4157,7 +4161,7 @@ hfs_copy_extent(
                kmem_free(kernel_map, (vm_offset_t)buffer, bufferSize);
 
        /* Make sure all writes have been flushed to disk. */
-       if (!journal_uses_fua(hfsmp->jnl)) {
+       if (vnode_issystem(vp) && !journal_uses_fua(hfsmp->jnl)) {
                err = VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, context);
                if (err) {
                        printf("hfs_copy_extent: DKIOCSYNCHRONIZECACHE failed (%d)\n", err);
@@ -4172,8 +4176,15 @@ hfs_copy_extent(
 }
 
 
+static int
+hfs_relocate_callback(__unused HFSPlusExtentKey *key, HFSPlusExtentRecord *record, HFSPlusExtentRecord *state)
+{
+       bcopy(state, record, sizeof(HFSPlusExtentRecord));
+       return 0;
+}
+
 /*
- * Reclaim space at the end of a volume, used by a given system file.
+ * Reclaim space at the end of a volume, used by a given file.
  *
  * This routine attempts to move any extent which contains allocation blocks
  * at or after "startblk."  A separate transaction is used to do the move.
@@ -4182,109 +4193,191 @@ hfs_copy_extent(
  * of a transaction have their physical block numbers invalidated so they will
  * eventually be written to their new locations.
  *
- * This routine can be used to move overflow extents for the allocation file.
- *
  * Inputs:
  *    hfsmp       The volume being resized.
  *    startblk    Blocks >= this allocation block need to be moved.
  *    locks       Which locks need to be taken for the given system file.
  *    vp          The vnode for the system file.
  *
+ *    The caller of this function, hfs_reclaimspace(), grabs cnode lock 
+ *    for non-system files before calling this function.  
+ *
  * Outputs:
- *    moved       Set to true if any extents were moved.
+ *    blks_moved  Total number of allocation blocks moved by this routine.
  */
 static int
-hfs_relocate_callback(__unused HFSPlusExtentKey *key, HFSPlusExtentRecord *record, HFSPlusExtentRecord *state)
-{
-       bcopy(state, record, sizeof(HFSPlusExtentRecord));
-       return 0;
-}
-static int
-hfs_reclaim_sys_file(struct hfsmount *hfsmp, struct vnode *vp, u_long startblk, int locks, Boolean *moved, vfs_context_t context)
+hfs_reclaim_file(struct hfsmount *hfsmp, struct vnode *vp, u_long startblk, int locks, u_int32_t *blks_moved, vfs_context_t context)
 {
        int error;
        int lockflags;
        int i;
        u_long datablks;
-       u_long block;
+       u_long end_block;
        u_int32_t oldStartBlock;
        u_int32_t newStartBlock;
-       u_int32_t blockCount;
+       u_int32_t oldBlockCount;
+       u_int32_t newBlockCount;
        struct filefork *fp;
-
+       struct cnode *cp;
+       int is_sysfile;
+       int took_truncate_lock = 0;
+       struct BTreeIterator *iterator = NULL;
+       u_int8_t forktype;
+       u_int32_t fileID;
+               
        /* If there is no vnode for this file, then there's nothing to do. */   
        if (vp == NULL)
                return 0;
 
-       /* printf("hfs_reclaim_sys_file: %.*s\n", VTOC(vp)->c_desc.cd_namelen, VTOC(vp)->c_desc.cd_nameptr); */
+       cp = VTOC(vp);
+       fileID = cp->c_cnid;
+       is_sysfile = vnode_issystem(vp);
+       forktype = VNODE_IS_RSRC(vp) ? 0xFF : 0;
+
+       /* Flush all the buffer cache blocks and cluster pages associated with 
+        * this vnode.  
+        *
+        * If the current vnode is a system vnode, all the buffer cache blocks 
+        * associated with it should already be sync'ed to the disk as part of 
+        * journal flush in hfs_truncatefs().  Normally there should not be 
+        * buffer cache blocks for regular files, but for objects like symlinks,
+        * we can have buffer cache blocks associated with the vnode.  Therefore
+        * we call buf_flushdirtyblks() always.  Resource fork data for directory 
+        * hard links are directly written using buffer cache for device vnode, 
+        * which should also be sync'ed as part of journal flush in hfs_truncatefs().
+        * 
+        * Flushing cluster pages should be the normal case for regular files, 
+        * and really should not do anything for system files.  But just to be 
+        * sure that all blocks associated with this vnode is sync'ed to the 
+        * disk, we call both buffer cache and cluster layer functions.  
+        */
+       buf_flushdirtyblks(vp, MNT_NOWAIT, 0, "hfs_reclaim_file");
        
+       if (!is_sysfile) {
+               /* The caller grabs cnode lock for non-system files only, therefore 
+                * we unlock only non-system files before calling cluster layer.
+                */
+               hfs_unlock(cp);
+               hfs_lock_truncate(cp, TRUE);
+               took_truncate_lock = 1;
+       }
+       (void) cluster_push(vp, 0);
+       if (!is_sysfile) {
+               error = hfs_lock(cp, HFS_FORCE_LOCK);
+               if (error) {
+                       hfs_unlock_truncate(cp, TRUE);
+                       return error;
+               }
+
+               /* If the file no longer exists, nothing left to do */
+               if (cp->c_flag & C_NOEXISTS) {
+                       hfs_unlock_truncate(cp, TRUE);
+                       return 0;
+               }
+       }
+
+       /* Wait for any in-progress writes to this vnode to complete, so that we'll
+        * be copying consistent bits.  (Otherwise, it's possible that an async
+        * write will complete to the old extent after we read from it.  That
+        * could lead to corruption.)
+        */
+       error = vnode_waitforwrites(vp, 0, 0, 0, "hfs_reclaim_file");
+       if (error) {
+               printf("hfs_reclaim_file: Error %d from vnode_waitforwrites\n", error);
+               return error;
+       }
+
+       if (hfs_resize_debug) {
+               printf("hfs_reclaim_file: Start relocating %sfork for fileid=%u name=%.*s\n", (forktype ? "rsrc" : "data"), fileID, cp->c_desc.cd_namelen, cp->c_desc.cd_nameptr);
+       }
+
        /* We always need the allocation bitmap and extents B-tree */
        locks |= SFL_BITMAP | SFL_EXTENTS;
        
        error = hfs_start_transaction(hfsmp);
        if (error) {
-               printf("hfs_reclaim_sys_file: hfs_start_transaction returned %d\n", error);
+               printf("hfs_reclaim_file: hfs_start_transaction returned %d\n", error);
+               if (took_truncate_lock) {
+                       hfs_unlock_truncate(cp, TRUE);
+               }
                return error;
        }
        lockflags = hfs_systemfile_lock(hfsmp, locks, HFS_EXCLUSIVE_LOCK);
        fp = VTOF(vp);
        datablks = 0;
+       *blks_moved = 0;
 
        /* Relocate non-overflow extents */
        for (i = 0; i < kHFSPlusExtentDensity; ++i) {
                if (fp->ff_extents[i].blockCount == 0)
                        break;
                oldStartBlock = fp->ff_extents[i].startBlock;
-               blockCount = fp->ff_extents[i].blockCount;
-               datablks += blockCount;
-               block = oldStartBlock + blockCount;
-               if (block > startblk) {
-                       error = BlockAllocate(hfsmp, 1, blockCount, blockCount, true, true, &newStartBlock, &blockCount);
+               oldBlockCount = fp->ff_extents[i].blockCount;
+               datablks += oldBlockCount;
+               end_block = oldStartBlock + oldBlockCount;
+               /* Check if the file overlaps the target space */
+               if (end_block > startblk) {
+                       /* Allocate a new extent */
+                       error = BlockAllocate(hfsmp, 1, oldBlockCount, oldBlockCount, true, (is_sysfile ? true : false), &newStartBlock, &newBlockCount);
                        if (error) {
-                               printf("hfs_reclaim_sys_file: BlockAllocate returned %d\n", error);
+                               printf("hfs_reclaim_file: BlockAllocate (error=%d) for fileID=%u %u:(%u,%u)\n", error, fileID, i, oldStartBlock, oldBlockCount);
                                goto fail;
                        }
-                       if (blockCount != fp->ff_extents[i].blockCount) {
-                               printf("hfs_reclaim_sys_file: new blockCount=%u, original blockCount=%u", blockCount, fp->ff_extents[i].blockCount);
-                               goto free_fail;
+                       if (newBlockCount != oldBlockCount) {
+                               printf("hfs_reclaim_file: fileID=%u - newBlockCount=%u, oldBlockCount=%u", fileID, newBlockCount, oldBlockCount);
+                               if (BlockDeallocate(hfsmp, newStartBlock, newBlockCount)) {
+                                       hfs_mark_volume_inconsistent(hfsmp);
+                               }
+                               goto fail;
                        }
-                       error = hfs_copy_extent(hfsmp, vp, oldStartBlock, newStartBlock, blockCount, context);
+
+                       /* Copy data from old location to new location */
+                       error = hfs_copy_extent(hfsmp, vp, oldStartBlock, newStartBlock, newBlockCount, context);
                        if (error) {
-                               printf("hfs_reclaim_sys_file: hfs_copy_extent returned %d\n", error);
-                               goto free_fail;
+                               printf("hfs_reclaim_file: hfs_copy_extent error=%d for fileID=%u %u:(%u,%u) to %u:(%u,%u)\n", error, fileID, i, oldStartBlock, oldBlockCount, i, newStartBlock, newBlockCount);
+                               if (BlockDeallocate(hfsmp, newStartBlock, newBlockCount)) {
+                                       hfs_mark_volume_inconsistent(hfsmp);
+                               }
+                               goto fail;
                        }
                        fp->ff_extents[i].startBlock = newStartBlock;
-                       VTOC(vp)->c_flag |= C_MODIFIED;
-                       *moved = true;
-                       error = BlockDeallocate(hfsmp, oldStartBlock, blockCount);
+                       cp->c_flag |= C_MODIFIED;
+                       *blks_moved += newBlockCount;
+
+                       /* Deallocate the old extent */
+                       error = BlockDeallocate(hfsmp, oldStartBlock, oldBlockCount);
                        if (error) {
-                               /* TODO: Mark volume inconsistent? */
-                               printf("hfs_reclaim_sys_file: BlockDeallocate returned %d\n", error);
+                               printf("hfs_reclaim_file: BlockDeallocate returned %d\n", error);
+                               hfs_mark_volume_inconsistent(hfsmp);
                                goto fail;
                        }
-                       error = hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
-                       if (error) {
-                               /* TODO: Mark volume inconsistent? */
-                               printf("hfs_reclaim_sys_file: hfs_flushvolumeheader returned %d\n", error);
-                               goto fail;
+
+                       /* If this is a system file, sync the volume header on disk */
+                       if (is_sysfile) {
+                               error = hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
+                               if (error) {
+                                       printf("hfs_reclaim_file: hfs_flushvolumeheader returned %d\n", error);
+                                       hfs_mark_volume_inconsistent(hfsmp);
+                                       goto fail;
+                               }
+                       }
+
+                       if (hfs_resize_debug) {
+                               printf ("hfs_reclaim_file: Relocated %u:(%u,%u) to %u:(%u,%u)\n", i, oldStartBlock, oldBlockCount, i, newStartBlock, newBlockCount);
                        }
                }
        }
 
        /* Relocate overflow extents (if any) */
        if (i == kHFSPlusExtentDensity && fp->ff_blocks > datablks) {
-               struct BTreeIterator *iterator = NULL;
                struct FSBufferDescriptor btdata;
                HFSPlusExtentRecord record;
                HFSPlusExtentKey *key;
                FCB *fcb;
-               u_int32_t fileID;
-               u_int8_t forktype;
+               int overflow_count = 0;
 
-               forktype = VNODE_IS_RSRC(vp) ? 0xFF : 0;
-               fileID = VTOC(vp)->c_cnid;
                if (kmem_alloc(kernel_map, (vm_offset_t*) &iterator, sizeof(*iterator))) {
-                       printf("hfs_reclaim_sys_file: kmem_alloc failed!\n");
+                       printf("hfs_reclaim_file: kmem_alloc failed!\n");
                        error = ENOMEM;
                        goto fail;
                }
@@ -4305,40 +4398,51 @@ hfs_reclaim_sys_file(struct hfsmount *hfsmp, struct vnode *vp, u_long startblk,
                error = BTSearchRecord(fcb, iterator, &btdata, NULL, iterator);
                while (error == 0) {
                        /* Stop when we encounter a different file or fork. */
-                       if ((key->fileID != fileID) ||
-                               (key->forkType != forktype)) {
+                       if ((key->fileID != fileID) || 
+                           (key->forkType != forktype)) {
                                break;
                        }
+               
+                       /* Just track the overflow extent record number for debugging... */
+                       if (hfs_resize_debug) {
+                               overflow_count++;
+                       }
+
                        /* 
                         * Check if the file overlaps target space.
                         */
                        for (i = 0; i < kHFSPlusExtentDensity; ++i) {
                                if (record[i].blockCount == 0) {
-                                       goto overflow_done;
+                                       goto fail;
                                }
                                oldStartBlock = record[i].startBlock;
-                               blockCount = record[i].blockCount;
-                               block = oldStartBlock + blockCount;
-                               if (block > startblk) {
-                                       error = BlockAllocate(hfsmp, 1, blockCount, blockCount, true, true, &newStartBlock, &blockCount);
+                               oldBlockCount = record[i].blockCount;
+                               end_block = oldStartBlock + oldBlockCount;
+                               if (end_block > startblk) {
+                                       error = BlockAllocate(hfsmp, 1, oldBlockCount, oldBlockCount, true, (is_sysfile ? true : false), &newStartBlock, &newBlockCount);
                                        if (error) {
-                                               printf("hfs_reclaim_sys_file: BlockAllocate returned %d\n", error);
-                                               goto overflow_done;
+                                               printf("hfs_reclaim_file: BlockAllocate (error=%d) for fileID=%u %u:(%u,%u)\n", error, fileID, i, oldStartBlock, oldBlockCount);
+                                               goto fail;
                                        }
-                                       if (blockCount != record[i].blockCount) {
-                                               printf("hfs_reclaim_sys_file: new blockCount=%u, original blockCount=%u", blockCount, fp->ff_extents[i].blockCount);
-                                               kmem_free(kernel_map, (vm_offset_t)iterator, sizeof(*iterator));
-                                               goto free_fail;
+                                       if (newBlockCount != oldBlockCount) {
+                                               printf("hfs_reclaim_file: fileID=%u - newBlockCount=%u, oldBlockCount=%u", fileID, newBlockCount, oldBlockCount);
+                                               if (BlockDeallocate(hfsmp, newStartBlock, newBlockCount)) {
+                                                       hfs_mark_volume_inconsistent(hfsmp);
+                                               }
+                                               goto fail;
                                        }
-                                       error = hfs_copy_extent(hfsmp, vp, oldStartBlock, newStartBlock, blockCount, context);
+                                       error = hfs_copy_extent(hfsmp, vp, oldStartBlock, newStartBlock, newBlockCount, context);
                                        if (error) {
-                                               printf("hfs_reclaim_sys_file: hfs_copy_extent returned %d\n", error);
-                                               kmem_free(kernel_map, (vm_offset_t)iterator, sizeof(*iterator));
-                                               goto free_fail;
+                                               printf("hfs_reclaim_file: hfs_copy_extent error=%d for fileID=%u (%u,%u) to (%u,%u)\n", error, fileID, oldStartBlock, oldBlockCount, newStartBlock, newBlockCount);
+                                               if (BlockDeallocate(hfsmp, newStartBlock, newBlockCount)) {
+                                                       hfs_mark_volume_inconsistent(hfsmp);
+                                               }
+                                               goto fail;
                                        }
                                        record[i].startBlock = newStartBlock;
-                                       VTOC(vp)->c_flag |= C_MODIFIED;
-                                       *moved = true;
+                                       cp->c_flag |= C_MODIFIED;
+                                       *blks_moved += newBlockCount;
+
                                        /*
                                         * NOTE: To support relocating overflow extents of the
                                         * allocation file, we must update the BTree record BEFORE
@@ -4349,15 +4453,18 @@ hfs_reclaim_sys_file(struct hfsmount *hfsmp, struct vnode *vp, u_long startblk,
                                         */
                                        error = BTUpdateRecord(fcb, iterator, (IterateCallBackProcPtr) hfs_relocate_callback, &record);
                                        if (error) {
-                                               /* TODO: Mark volume inconsistent? */
-                                               printf("hfs_reclaim_sys_file: BTUpdateRecord returned %d\n", error);
-                                               goto overflow_done;
+                                               printf("hfs_reclaim_file: BTUpdateRecord returned %d\n", error);
+                                               hfs_mark_volume_inconsistent(hfsmp);
+                                               goto fail;
                                        }
-                                       error = BlockDeallocate(hfsmp, oldStartBlock, blockCount);
+                                       error = BlockDeallocate(hfsmp, oldStartBlock, oldBlockCount);
                                        if (error) {
-                                               /* TODO: Mark volume inconsistent? */
-                                               printf("hfs_reclaim_sys_file: BlockDeallocate returned %d\n", error);
-                                               goto overflow_done;
+                                               printf("hfs_reclaim_file: BlockDeallocate returned %d\n", error);
+                                               hfs_mark_volume_inconsistent(hfsmp);
+                                               goto fail;
+                                       }
+                                       if (hfs_resize_debug) {
+                                               printf ("hfs_reclaim_file: Relocated overflow#%d %u:(%u,%u) to %u:(%u,%u)\n", overflow_count, i, oldStartBlock, oldBlockCount, i, newStartBlock, newBlockCount);
                                        }
                                }
                        }
@@ -4368,26 +4475,29 @@ hfs_reclaim_sys_file(struct hfsmount *hfsmp, struct vnode *vp, u_long startblk,
                                break;
                        }
                }
-overflow_done:
-               kmem_free(kernel_map, (vm_offset_t)iterator, sizeof(*iterator));
-               if (error) {
-                       goto fail;
-               }
        }
        
-       hfs_systemfile_unlock(hfsmp, lockflags);
-       error = hfs_end_transaction(hfsmp);
-       if (error) {
-               printf("hfs_reclaim_sys_file: hfs_end_transaction returned %d\n", error);
+fail:
+       if (iterator) {
+               kmem_free(kernel_map, (vm_offset_t)iterator, sizeof(*iterator));
        }
 
-       return error;
-
-free_fail:
-       (void) BlockDeallocate(hfsmp, newStartBlock, blockCount);
-fail:
        (void) hfs_systemfile_unlock(hfsmp, lockflags);
+
+       if ((*blks_moved != 0) && (is_sysfile == false)) {
+               (void) hfs_update(vp, MNT_WAIT);
+       }
+
        (void) hfs_end_transaction(hfsmp);
+
+       if (took_truncate_lock) {
+               hfs_unlock_truncate(cp, TRUE);
+       }
+
+       if (hfs_resize_debug) {
+               printf("hfs_reclaim_file: Finished relocating %sfork for fileid=%u (error=%d)\n", (forktype ? "rsrc" : "data"), fileID, error);
+       }
+
        return error;
 }
 
@@ -4453,6 +4563,7 @@ hfs_reclaim_journal_file(struct hfsmount *hfsmp, vfs_context_t context)
 {
        int error;
        int lockflags;
+       u_int32_t oldStartBlock;
        u_int32_t newStartBlock;
        u_int32_t oldBlockCount;
        u_int32_t newBlockCount;
@@ -4493,6 +4604,7 @@ hfs_reclaim_journal_file(struct hfsmount *hfsmp, vfs_context_t context)
                printf("hfs_reclaim_journal_file: cat_idlookup returned %d\n", error);
                goto free_fail;
        }
+       oldStartBlock = journal_fork.cf_extents[0].startBlock;
        journal_fork.cf_size = newBlockCount * hfsmp->blockSize;
        journal_fork.cf_extents[0].startBlock = newStartBlock;
        journal_fork.cf_extents[0].blockCount = newBlockCount;
@@ -4524,6 +4636,9 @@ hfs_reclaim_journal_file(struct hfsmount *hfsmp, vfs_context_t context)
                printf("hfs_reclaim_journal_file: hfs_end_transaction returned %d\n", error);
        }
        
+       if (!error && hfs_resize_debug) {
+               printf ("hfs_reclaim_journal_file: Successfully relocated journal from (%u,%u) to (%u,%u)\n", oldStartBlock, oldBlockCount, newStartBlock, newBlockCount);
+       }
        return error;
 
 free_fail:
@@ -4531,6 +4646,9 @@ free_fail:
 fail:
        hfs_systemfile_unlock(hfsmp, lockflags);
        (void) hfs_end_transaction(hfsmp);
+       if (hfs_resize_debug) {
+               printf ("hfs_reclaim_journal_file: Error relocating journal file (error=%d)\n", error);
+       }
        return error;
 }
 
@@ -4545,6 +4663,7 @@ hfs_reclaim_journal_info_block(struct hfsmount *hfsmp, vfs_context_t context)
 {
        int error;
        int lockflags;
+       u_int32_t oldBlock;
        u_int32_t newBlock;
        u_int32_t blockCount;
        struct cat_desc jib_desc;
@@ -4608,6 +4727,7 @@ hfs_reclaim_journal_info_block(struct hfsmount *hfsmp, vfs_context_t context)
                printf("hfs_reclaim_journal_file: cat_idlookup returned %d\n", error);
                goto fail;
        }
+       oldBlock = jib_fork.cf_extents[0].startBlock;
        jib_fork.cf_size = hfsmp->blockSize;
        jib_fork.cf_extents[0].startBlock = newBlock;
        jib_fork.cf_extents[0].blockCount = 1;
@@ -4635,6 +4755,10 @@ hfs_reclaim_journal_info_block(struct hfsmount *hfsmp, vfs_context_t context)
        if (error) {
                printf("hfs_reclaim_journal_info_block: journal_flush returned %d\n", error);
        }
+
+       if (!error && hfs_resize_debug) {
+               printf ("hfs_reclaim_journal_info_block: Successfully relocated journal info block from (%u,%u) to (%u,%u)\n", oldBlock, blockCount, newBlock, blockCount);
+       }
        return error;
 
 free_fail:
@@ -4642,12 +4766,19 @@ free_fail:
 fail:
        hfs_systemfile_unlock(hfsmp, lockflags);
        (void) hfs_end_transaction(hfsmp);
+       if (hfs_resize_debug) {
+               printf ("hfs_reclaim_journal_info_block: Error relocating journal info block (error=%d)\n", error);
+       }
        return error;
 }
 
 
 /*
  * Reclaim space at the end of a file system.
+ *
+ * Inputs - 
+ *     startblk        - start block of the space being reclaimed
+ *     reclaimblks     - number of allocation blocks to reclaim
  */
 static int
 hfs_reclaimspace(struct hfsmount *hfsmp, u_int32_t startblk, u_int32_t reclaimblks, vfs_context_t context)
@@ -4663,45 +4794,53 @@ hfs_reclaimspace(struct hfsmount *hfsmp, u_int32_t startblk, u_int32_t reclaimbl
        int filecnt = 0;
        int maxfilecnt;
        u_int32_t block;
-       u_int32_t datablks;
-       u_int32_t rsrcblks;
-       u_int32_t blkstomove = 0;
        int lockflags;
-       int i;
+       int i, j;
        int error;
        int lastprogress = 0;
-       Boolean system_file_moved = false;
+       u_int32_t blks_moved = 0;
+       u_int32_t total_blks_moved = 0;
+       Boolean need_relocate;
 
        /* Relocate extents of the Allocation file if they're in the way. */
-       error = hfs_reclaim_sys_file(hfsmp, hfsmp->hfs_allocation_vp, startblk, SFL_BITMAP, &system_file_moved, context);
+       error = hfs_reclaim_file(hfsmp, hfsmp->hfs_allocation_vp, startblk, SFL_BITMAP, &blks_moved, context);
        if (error) {
                printf("hfs_reclaimspace: reclaim allocation file returned %d\n", error);
                return error;
        }
+       total_blks_moved += blks_moved;
+
        /* Relocate extents of the Extents B-tree if they're in the way. */
-       error = hfs_reclaim_sys_file(hfsmp, hfsmp->hfs_extents_vp, startblk, SFL_EXTENTS, &system_file_moved, context);
+       error = hfs_reclaim_file(hfsmp, hfsmp->hfs_extents_vp, startblk, SFL_EXTENTS, &blks_moved, context);
        if (error) {
                printf("hfs_reclaimspace: reclaim extents b-tree returned %d\n", error);
                return error;
        }
+       total_blks_moved += blks_moved;
+
        /* Relocate extents of the Catalog B-tree if they're in the way. */
-       error = hfs_reclaim_sys_file(hfsmp, hfsmp->hfs_catalog_vp, startblk, SFL_CATALOG, &system_file_moved, context);
+       error = hfs_reclaim_file(hfsmp, hfsmp->hfs_catalog_vp, startblk, SFL_CATALOG, &blks_moved, context);
        if (error) {
                printf("hfs_reclaimspace: reclaim catalog b-tree returned %d\n", error);
                return error;
        }
+       total_blks_moved += blks_moved;
+
        /* Relocate extents of the Attributes B-tree if they're in the way. */
-       error = hfs_reclaim_sys_file(hfsmp, hfsmp->hfs_attribute_vp, startblk, SFL_ATTRIBUTE, &system_file_moved, context);
+       error = hfs_reclaim_file(hfsmp, hfsmp->hfs_attribute_vp, startblk, SFL_ATTRIBUTE, &blks_moved, context);
        if (error) {
                printf("hfs_reclaimspace: reclaim attribute b-tree returned %d\n", error);
                return error;
        }
+       total_blks_moved += blks_moved;
+
        /* Relocate extents of the Startup File if there is one and they're in the way. */
-       error = hfs_reclaim_sys_file(hfsmp, hfsmp->hfs_startup_vp, startblk, SFL_STARTUP, &system_file_moved, context);
+       error = hfs_reclaim_file(hfsmp, hfsmp->hfs_startup_vp, startblk, SFL_STARTUP, &blks_moved, context);
        if (error) {
                printf("hfs_reclaimspace: reclaim startup file returned %d\n", error);
                return error;
        }
+       total_blks_moved += blks_moved;
        
        /*
         * We need to make sure the alternate volume header gets flushed if we moved
@@ -4709,12 +4848,13 @@ hfs_reclaimspace(struct hfsmount *hfsmp, u_int32_t startblk, u_int32_t reclaimbl
         * shrinking the size of the volume, or else the journal code will panic
         * with an invalid (too large) block number.
         *
-        * Note that system_file_moved will be set if ANY extent was moved, even
+        * Note that total_blks_moved will be set if ANY extent was moved, even
         * if it was just an overflow extent.  In this case, the journal_flush isn't
         * strictly required, but shouldn't hurt.
         */
-       if (system_file_moved)
+       if (total_blks_moved) {
                hfs_journal_flush(hfsmp);
+       }
 
        if (hfsmp->jnl_start + (hfsmp->jnl_size / hfsmp->blockSize) > startblk) {
                error = hfs_reclaim_journal_file(hfsmp, context);
@@ -4745,6 +4885,7 @@ hfs_reclaimspace(struct hfsmount *hfsmp, u_int32_t startblk, u_int32_t reclaimbl
        }       
 
        saved_next_allocation = hfsmp->nextAllocation;
+       /* Always try allocating new blocks after the metadata zone */
        HFS_UPDATE_NEXT_ALLOCATION(hfsmp, hfsmp->hfs_metazone_start);
 
        fcb = VTOF(hfsmp->hfs_catalog_vp);
@@ -4763,7 +4904,8 @@ hfs_reclaimspace(struct hfsmount *hfsmp, u_int32_t startblk, u_int32_t reclaimbl
        }
        /*
         * Iterate over all the catalog records looking for files
-        * that overlap into the space we're trying to free up.
+        * that overlap into the space we're trying to free up and 
+        * the total number of blocks that will require relocation.
         */
        for (filecnt = 0; filecnt < maxfilecnt; ) {
                error = BTIterateRecord(fcb, kBTreeNextRecord, iterator, &btdata, NULL);
@@ -4776,58 +4918,64 @@ hfs_reclaimspace(struct hfsmount *hfsmp, u_int32_t startblk, u_int32_t reclaimbl
                if (filerec.recordType != kHFSPlusFileRecord) {
                        continue;
                }
-               datablks = rsrcblks = 0;
-               /* 
-                * Check if either fork overlaps target space.
-                */
+
+               need_relocate = false;
+               /* Check if data fork overlaps the target space */
                for (i = 0; i < kHFSPlusExtentDensity; ++i) {
-                       if (filerec.dataFork.extents[i].blockCount != 0) {
-                               datablks += filerec.dataFork.extents[i].blockCount;
-                               block = filerec.dataFork.extents[i].startBlock +
-                                               filerec.dataFork.extents[i].blockCount;
-                               if (block >= startblk) {
-                                       if ((filerec.fileID == hfsmp->hfs_jnlfileid) ||
-                                               (filerec.fileID == hfsmp->hfs_jnlinfoblkid)) {
-                                               printf("hfs_reclaimspace: cannot move active journal\n");
-                                               error = EPERM;
-                                               goto end_iteration;
-                                       }
-                                       cnidbufp[filecnt++] = filerec.fileID;
-                                       blkstomove += filerec.dataFork.totalBlocks;
-                                       break;
-                               }
+                       if (filerec.dataFork.extents[i].blockCount == 0) {
+                               break;
                        }
-                       if (filerec.resourceFork.extents[i].blockCount != 0) {
-                               rsrcblks += filerec.resourceFork.extents[i].blockCount;
-                               block = filerec.resourceFork.extents[i].startBlock +
-                                               filerec.resourceFork.extents[i].blockCount;
-                               if (block >= startblk) {
-                                       cnidbufp[filecnt++] = filerec.fileID;
-                                       blkstomove += filerec.resourceFork.totalBlocks;
-                                       break;
+                       block = filerec.dataFork.extents[i].startBlock +
+                               filerec.dataFork.extents[i].blockCount;
+                       if (block >= startblk) {
+                               if ((filerec.fileID == hfsmp->hfs_jnlfileid) ||
+                                   (filerec.fileID == hfsmp->hfs_jnlinfoblkid)) {
+                                       printf("hfs_reclaimspace: cannot move active journal\n");
+                                       error = EPERM;
+                                       goto end_iteration;
                                }
+                               need_relocate = true;
+                               goto save_fileid;
                        }
                }
-               /*
-                * Check for any overflow extents that overlap.
-                */
-               if (i == kHFSPlusExtentDensity) {
-                       if (filerec.dataFork.totalBlocks > datablks) {
-                               if (hfs_overlapped_overflow_extents(hfsmp, startblk, datablks, filerec.fileID, 0)) {
-                                       cnidbufp[filecnt++] = filerec.fileID;
-                                       blkstomove += filerec.dataFork.totalBlocks;
-                               }
-                       } else if (filerec.resourceFork.totalBlocks > rsrcblks) {
-                               if (hfs_overlapped_overflow_extents(hfsmp, startblk, rsrcblks, filerec.fileID, 1)) {
-                                       cnidbufp[filecnt++] = filerec.fileID;
-                                       blkstomove += filerec.resourceFork.totalBlocks;
-                               }
+
+               /* Check if resource fork overlaps the target space */
+               for (j = 0; j < kHFSPlusExtentDensity; ++j) {
+                       if (filerec.resourceFork.extents[j].blockCount == 0) {
+                               break;
+                       }
+                       block = filerec.resourceFork.extents[j].startBlock +
+                               filerec.resourceFork.extents[j].blockCount;
+                       if (block >= startblk) {
+                               need_relocate = true;
+                               goto save_fileid;
+                       }
+               }
+
+               /* Check if any forks' overflow extents overlap the target space */
+               if ((i == kHFSPlusExtentDensity) || (j == kHFSPlusExtentDensity)) {
+                       if (hfs_overlapped_overflow_extents(hfsmp, startblk, filerec.fileID)) {
+                               need_relocate = true;
+                               goto save_fileid;
+                       }
+               }
+
+save_fileid:
+               if (need_relocate == true) {
+                       cnidbufp[filecnt++] = filerec.fileID;
+                       if (hfs_resize_debug) {
+                               printf ("hfs_reclaimspace: Will relocate extents for fileID=%u\n", filerec.fileID);
                        }
                }
        }
 
 end_iteration:
-       if (filecnt == 0 && !system_file_moved) {
+       /* If no regular file was found to be relocated and 
+        * no system file was moved, we probably do not have 
+        * enough space to relocate the system files, or 
+        * something else went wrong.
+        */
+       if ((filecnt == 0) && (total_blks_moved == 0)) {
                printf("hfs_reclaimspace: no files moved\n");
                error = ENOSPC;
        }
@@ -4836,66 +4984,52 @@ end_iteration:
        if (error || filecnt == 0)
                goto out;
 
-       /*
-        * Double check space requirements to make sure
-        * there is enough space to relocate any files
-        * that reside in the reclaim area.
-        *
-        *                                          Blocks To Move --------------
-        *                                                            |    |    |
-        *                                                            V    V    V
-        * ------------------------------------------------------------------------
-        * |                                                        | /   ///  // |
-        * |                                                        | /   ///  // |
-        * |                                                        | /   ///  // |
-        * ------------------------------------------------------------------------
-        *
-        * <------------------- New Total Blocks ------------------><-- Reclaim -->
-        *
-        * <------------------------ Original Total Blocks ----------------------->
-        *
-        */
-       if (blkstomove >= hfs_freeblks(hfsmp, 1)) {
-               printf("hfs_truncatefs: insufficient space (need %u blocks; have %u blocks)\n", blkstomove, hfs_freeblks(hfsmp, 1));
-               error = ENOSPC;
-               goto out;
-       }
        hfsmp->hfs_resize_filesmoved = 0;
        hfsmp->hfs_resize_totalfiles = filecnt;
        
        /* Now move any files that are in the way. */
        for (i = 0; i < filecnt; ++i) {
-               struct vnode * rvp;
-        struct cnode * cp;
+               struct vnode *rvp;
+               struct cnode *cp;
+               struct filefork *datafork;
 
                if (hfs_vget(hfsmp, cnidbufp[i], &vp, 0) != 0)
                        continue;
+               
+               cp = VTOC(vp);
+               datafork = VTOF(vp);
 
-        /* Relocating directory hard links is not supported, so we
-         * punt (see radar 6217026). */
-        cp = VTOC(vp);
-        if ((cp->c_flag & C_HARDLINK) && vnode_isdir(vp)) {
-            printf("hfs_reclaimspace: unable to relocate directory hard link %d\n", cp->c_cnid);
-            error = EINVAL;
-            goto out;
-        }
-
-               /* Relocate any data fork blocks. */
-               if (VTOF(vp) && VTOF(vp)->ff_blocks > 0) {
-                       error = hfs_relocate(vp, hfsmp->hfs_metazone_end + 1, kauth_cred_get(), current_proc());
+               /* Relocating directory hard links is not supported, so we punt (see radar 6217026). */
+               if ((cp->c_flag & C_HARDLINK) && vnode_isdir(vp)) {
+                       printf("hfs_reclaimspace: Unable to relocate directory hard link id=%d\n", cp->c_cnid);
+                       error = EINVAL;
+                       goto out;
                }
-               if (error) 
-                       break;
 
-               /* Relocate any resource fork blocks. */
-               if ((cp->c_blocks - (VTOF(vp) ? VTOF((vp))->ff_blocks : 0)) > 0) {
-                       error = hfs_vgetrsrc(hfsmp, vp, &rvp, TRUE);
-                       if (error)
+               /* Relocate any overlapping data fork blocks. */
+               if (datafork && datafork->ff_blocks > 0) {
+                       error = hfs_reclaim_file(hfsmp, vp, startblk, 0, &blks_moved, context);
+                       if (error)  {
+                               printf ("hfs_reclaimspace: Error reclaiming datafork blocks of fileid=%u (error=%d)\n", cnidbufp[i], error);
                                break;
-                       error = hfs_relocate(rvp, hfsmp->hfs_metazone_end + 1, kauth_cred_get(), current_proc());
+                       }
+                       total_blks_moved += blks_moved;
+               }
+
+               /* Relocate any overlapping resource fork blocks. */
+               if ((cp->c_blocks - (datafork ? datafork->ff_blocks : 0)) > 0) {
+                       error = hfs_vgetrsrc(hfsmp, vp, &rvp, TRUE, TRUE);
+                       if (error) {
+                               printf ("hfs_reclaimspace: Error looking up rvp for fileid=%u (error=%d)\n", cnidbufp[i], error);
+                               break;
+                       }
+                       error = hfs_reclaim_file(hfsmp, rvp, startblk, 0, &blks_moved, context);
                        VTOC(rvp)->c_flag |= C_NEED_RVNODE_PUT;
-                       if (error)
+                       if (error) {
+                               printf ("hfs_reclaimspace: Error reclaiming rsrcfork blocks of fileid=%u (error=%d)\n", cnidbufp[i], error);
                                break;
+                       }
+                       total_blks_moved += blks_moved;
                }
                hfs_unlock(cp);
                vnode_put(vp);
@@ -4920,8 +5054,8 @@ end_iteration:
                vp = NULL;
        }
        if (hfsmp->hfs_resize_filesmoved != 0) {
-               printf("hfs_reclaimspace: relocated %d files on \"%s\"\n",
-                      (int)hfsmp->hfs_resize_filesmoved, hfsmp->vcbVN);
+               printf("hfs_reclaimspace: relocated %u blocks from %d files on \"%s\"\n",
+                       total_blks_moved, (int)hfsmp->hfs_resize_filesmoved, hfsmp->vcbVN);
        }
 out:
        kmem_free(kernel_map, (vm_offset_t)iterator, sizeof(*iterator));
@@ -4939,32 +5073,34 @@ out:
 
 
 /*
- * Check if there are any overflow extents that overlap.
+ * Check if there are any overflow data or resource fork extents that overlap 
+ * into the disk space that is being reclaimed.  
+ *
+ * Output - 
+ *     1 - One of the overflow extents need to be relocated
+ *     0 - No overflow extents need to be relocated, or there was an error
  */
 static int
-hfs_overlapped_overflow_extents(struct hfsmount *hfsmp, u_int32_t startblk, u_int32_t catblks, u_int32_t fileID, int rsrcfork)
+hfs_overlapped_overflow_extents(struct hfsmount *hfsmp, u_int32_t startblk, u_int32_t fileID)
 {
        struct BTreeIterator * iterator = NULL;
        struct FSBufferDescriptor btdata;
        HFSPlusExtentRecord extrec;
        HFSPlusExtentKey *extkeyptr;
        FCB *fcb;
-       u_int32_t block;
-       u_int8_t forktype;
        int overlapped = 0;
        int i;
        int error;
 
-       forktype = rsrcfork ? 0xFF : 0;
        if (kmem_alloc(kernel_map, (vm_offset_t *)&iterator, sizeof(*iterator))) {
-               return (0);
+               return 0;
        }       
        bzero(iterator, sizeof(*iterator));
        extkeyptr = (HFSPlusExtentKey *)&iterator->key;
        extkeyptr->keyLength = kHFSPlusExtentKeyMaximumLength;
-       extkeyptr->forkType = forktype;
+       extkeyptr->forkType = 0;
        extkeyptr->fileID = fileID;
-       extkeyptr->startBlock = catblks;
+       extkeyptr->startBlock = 0;
 
        btdata.bufferAddress = &extrec;
        btdata.itemSize = sizeof(extrec);
@@ -4972,32 +5108,41 @@ hfs_overlapped_overflow_extents(struct hfsmount *hfsmp, u_int32_t startblk, u_in
        
        fcb = VTOF(hfsmp->hfs_extents_vp);
 
+       /* This will position the iterator just before the first overflow 
+        * extent record for given fileID.  It will always return btNotFound, 
+        * so we special case the error code.
+        */
        error = BTSearchRecord(fcb, iterator, &btdata, NULL, iterator);
+       if (error && (error != btNotFound)) {
+               goto out;
+       }
+
+       /* BTIterateRecord() might return error if the btree is empty, and 
+        * therefore we return that the extent does not overflow to the caller
+        */
+       error = BTIterateRecord(fcb, kBTreeNextRecord, iterator, &btdata, NULL);
        while (error == 0) {
                /* Stop when we encounter a different file. */
-               if ((extkeyptr->fileID != fileID) ||
-                   (extkeyptr->forkType != forktype)) {
+               if (extkeyptr->fileID != fileID) {
                        break;
                }
-               /* 
-                * Check if the file overlaps target space.
-                */
+               /* Check if any of the forks exist in the target space. */
                for (i = 0; i < kHFSPlusExtentDensity; ++i) {
                        if (extrec[i].blockCount == 0) {
                                break;
                        }
-                       block = extrec[i].startBlock + extrec[i].blockCount;
-                       if (block >= startblk) {
+                       if ((extrec[i].startBlock + extrec[i].blockCount) >= startblk) {
                                overlapped = 1;
-                               break;
+                               goto out;
                        }
                }
                /* Look for more records. */
                error = BTIterateRecord(fcb, kBTreeNextRecord, iterator, &btdata, NULL);
        }
 
+out:
        kmem_free(kernel_map, (vm_offset_t)iterator, sizeof(*iterator));
-       return (overlapped);
+       return overlapped;
 }
 
 
index 2485c73f6a283b7a74e4747954346c3d8b1d274a..307e2db66da929be6ca51322f2d9e242a9726229 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -1493,7 +1493,7 @@ hfs_freeblks(struct hfsmount * hfsmp, int wantreserve)
        /*
         * We don't bother taking the mount lock
         * to look at these values since the values
-        * themselves are each updated automically
+        * themselves are each updated atomically
         * on aligned addresses.
         */
        freeblks = hfsmp->freeBlocks;
index 9114d0a994b28693a59b356b34c42cef0c2f2676..eda49e24284694d5f7e030bbdde147adc97b5b6f 100644 (file)
@@ -812,8 +812,14 @@ hfs_vnop_getattr(struct vnop_getattr_args *ap)
                                
                                if (cp->c_blocks - VTOF(vp)->ff_blocks) {
                                        /* We deal with rsrc fork vnode iocount at the end of the function */
-                                       error = hfs_vgetrsrc(hfsmp, vp, &rvp, TRUE);
+                                       error = hfs_vgetrsrc(hfsmp, vp, &rvp, TRUE, TRUE);
                                        if (error) {
+                                               /* 
+                                                * hfs_vgetrsrc may have returned a vnode in rvp even though
+                                                * we got an error, because we specified error_on_unlinked.
+                                                * We need to drop the iocount after we release the cnode lock, so
+                                                * it will be taken care of at the end of the function if it's needed.
+                                                */
                                                goto out;
                                        }
                                        
@@ -2263,11 +2269,15 @@ hfs_vnop_remove(ap)
                if ((error = hfs_lock (cp, HFS_EXCLUSIVE_LOCK))) {
                        return (error);
                }
-
-               error = hfs_vgetrsrc(hfsmp, vp, &rvp, TRUE);
+               error = hfs_vgetrsrc(hfsmp, vp, &rvp, TRUE, TRUE);
                hfs_unlock(cp);
                if (error) {
-                       return (error); 
+                       /* We may have gotten a rsrc vp out even though we got an error back. */
+                       if (rvp) {
+                               vnode_put(rvp);
+                               rvp = NULL;
+                       }
+                       return error;
                }
                drop_rsrc_vnode = 1;
        }
@@ -2670,10 +2680,17 @@ hfs_removefile(struct vnode *dvp, struct vnode *vp, struct componentname *cnp,
                if (error && error != ENXIO && error != ENOENT && truncated) {
                        if ((cp->c_datafork && cp->c_datafork->ff_size != 0) ||
                                        (cp->c_rsrcfork && cp->c_rsrcfork->ff_size != 0)) {
+                               off_t data_size = 0;
+                               off_t rsrc_size = 0;
+                               if (cp->c_datafork) {
+                                       data_size = cp->c_datafork->ff_size;
+                               }
+                               if (cp->c_rsrcfork) {
+                                       rsrc_size = cp->c_rsrcfork->ff_size;
+                               }
                                printf("hfs: remove: couldn't delete a truncated file (%s)" 
                                                "(error %d, data sz %lld; rsrc sz %lld)",
-                                       cp->c_desc.cd_nameptr, error, cp->c_datafork->ff_size, 
-                                       cp->c_rsrcfork->ff_size);
+                                       cp->c_desc.cd_nameptr, error, data_size, rsrc_size);
                                hfs_mark_volume_inconsistent(hfsmp);
                        } else {
                                printf("hfs: remove: strangely enough, deleting truncated file %s (%d) got err %d\n",
@@ -2850,10 +2867,17 @@ hfs_vnop_rename(ap)
                if ((error = hfs_lock (VTOC(fvp), HFS_EXCLUSIVE_LOCK))) {
                        return (error);
                }
-
-               error = hfs_vgetrsrc(VTOHFS(fvp), fvp, &fvp_rsrc, TRUE);
+               
+               /*
+                * We care if we race against rename/delete with this cnode, so we'll
+                * error out if this file becomes open-unlinked during this call.
+                */
+               error = hfs_vgetrsrc(VTOHFS(fvp), fvp, &fvp_rsrc, TRUE, TRUE);
                hfs_unlock (VTOC(fvp));
                if (error) {
+                       if (fvp_rsrc) {
+                               vnode_put (fvp_rsrc);
+                       }
                        return error;
                }
        }
@@ -2865,13 +2889,30 @@ hfs_vnop_rename(ap)
                 * grab the resource fork if the lock succeeded.
                 */
                if (hfs_lock (VTOC(tvp), HFS_EXCLUSIVE_LOCK) == 0) {
-                       error = hfs_vgetrsrc(VTOHFS(tvp), tvp, &tvp_rsrc, TRUE);
-                       hfs_unlock (VTOC(tvp));
+                       tcp = VTOC(tvp);
+                       
+                       /* 
+                        * We only care if we get an open-unlinked file on the dst so we 
+                        * know to null out tvp/tcp to make the rename operation act 
+                        * as if they never existed.  Because they're effectively out of the
+                        * namespace already it's fine to do this.  If this is true, then
+                        * make sure to unlock the cnode and drop the iocount only after the unlock.
+                        */
+                       error = hfs_vgetrsrc(VTOHFS(tvp), tvp, &tvp_rsrc, TRUE, TRUE);
+                       hfs_unlock (tcp);
                        if (error) {
-                               if (fvp_rsrc) {
-                                       vnode_put (fvp_rsrc);
+                               /*
+                                * Since we specify TRUE for error-on-unlinked in hfs_vgetrsrc,
+                                * we can get a rsrc fork vp even if it returns an error.
+                                */
+                               tcp = NULL;
+                               tvp = NULL;
+                               if (tvp_rsrc) {
+                                       vnode_put (tvp_rsrc);
+                                       tvp_rsrc = NULLVP;
                                }
-                               return error;
+                               /* just bypass truncate lock and act as if we never got tcp/tvp */
+                               goto retry;
                        }
                }
        }
@@ -4282,22 +4323,48 @@ exit:
 }
 
 
-/*
- * Return a referenced vnode for the resource fork
- *
- * cnode for vnode vp must already be locked.
- *
- * can_drop_lock is true if its safe to temporarily drop/re-acquire the cnode lock
+
+/* hfs_vgetrsrc acquires a resource fork vnode corresponding to the cnode that is
+ * found in 'vp'.  The rsrc fork vnode is returned with the cnode locked and iocount
+ * on the rsrc vnode.
+ * 
+ * *rvpp is an output argument for returning the pointer to the resource fork vnode.
+ * In most cases, the resource fork vnode will not be set if we return an error. 
+ * However, if error_on_unlinked is set, we may have already acquired the resource fork vnode
+ * before we discover the error (the file has gone open-unlinked).  In this case only,
+ * we may return a vnode in the output argument despite an error.
+ * 
+ * If can_drop_lock is set, then it is safe for this function to temporarily drop
+ * and then re-acquire the cnode lock.  We may need to do this, for example, in order to 
+ * acquire an iocount or promote our lock.  
+ * 
+ * error_on_unlinked is an argument which indicates that we are to return an error if we 
+ * discover that the cnode has gone into an open-unlinked state ( C_DELETED or C_NOEXISTS)
+ * is set in the cnode flags.  This is only necessary if can_drop_lock is true, otherwise 
+ * there's really no reason to double-check for errors on the cnode.
  */
+
 __private_extern__
 int
-hfs_vgetrsrc(struct hfsmount *hfsmp, struct vnode *vp, struct vnode **rvpp, int can_drop_lock)
+hfs_vgetrsrc(struct hfsmount *hfsmp, struct vnode *vp, 
+               struct vnode **rvpp, int can_drop_lock, int error_on_unlinked)
 {
        struct vnode *rvp;
        struct vnode *dvp = NULLVP;
        struct cnode *cp = VTOC(vp);
        int error;
        int vid;
+       int delete_status = 0;
+
+
+       /*
+        * Need to check the status of the cnode to validate it hasn't
+        * gone open-unlinked on us before we can actually do work with it.
+        */
+       delete_status = hfs_checkdeleted (cp);
+       if ((delete_status) && (error_on_unlinked)) {
+               return delete_status;
+       }
 
 restart:
        /* Attempt to use exising vnode */
@@ -4324,6 +4391,32 @@ restart:
 
                if (can_drop_lock) {
                        (void) hfs_lock(cp, HFS_FORCE_LOCK);
+
+                       /*
+                        * When we relinquished our cnode lock, the cnode could have raced
+                        * with a delete and gotten deleted.  If the caller did not want
+                        * us to ignore open-unlinked files, then re-check the C_DELETED
+                        * state and see if we need to return an ENOENT here because the item
+                        * got deleted in the intervening time.
+                        */
+                       if (error_on_unlinked) {
+                               if ((delete_status = hfs_checkdeleted(cp))) {
+                                       /* 
+                                        * If error == 0, this means that we succeeded in acquiring an iocount on the 
+                                        * rsrc fork vnode.  However, if we're in this block of code, that 
+                                        * means that we noticed that the cnode has gone open-unlinked.  In 
+                                        * this case, the caller requested that we not do any other work and 
+                                        * return an errno.  The caller will be responsible for dropping the 
+                                        * iocount we just acquired because we can't do it until we've released 
+                                        * the cnode lock.  
+                                        */
+                                       if (error == 0) {
+                                               *rvpp = rvp;
+                                       }
+                                       return delete_status;
+                               }
+                       }
+
                        /*
                         * When our lock was relinquished, the resource fork
                         * could have been recycled.  Check for this and try
@@ -4359,7 +4452,7 @@ restart:
                                return (EINVAL);
                        }
                        /*
-                        * If the upgrade fails we loose the lock and
+                        * If the upgrade fails we lose the lock and
                         * have to take the exclusive lock on our own.
                         */
                        if (lck_rw_lock_shared_to_exclusive(&cp->c_rwlock) == FALSE)
@@ -4372,9 +4465,17 @@ restart:
                 * C_DELETED.  This is because we need to continue to provide rsrc
                 * fork access to open-unlinked files.  In this case, build a fake descriptor
                 * like in hfs_removefile.  If we don't do this, buildkey will fail in
-                * cat_lookup because this cnode has no name in its descriptor.
+                * cat_lookup because this cnode has no name in its descriptor. However,
+                * only do this if the caller did not specify that they wanted us to
+                * error out upon encountering open-unlinked files.
                 */
 
+               if ((error_on_unlinked) && (can_drop_lock)) {
+                       if ((error = hfs_checkdeleted (cp))) {
+                               return error;
+                       }
+               }
+
                if ((cp->c_flag & C_DELETED ) && (cp->c_desc.cd_namelen == 0)) {
                        bzero (&to_desc, sizeof(to_desc));
                        bzero (delname, 32);
index 915fbe874ad56ab6d29623112c33caf8a079a1df..f552b9c7558ff0e6378e42c755fc3aa7a1cea902 100644 (file)
@@ -141,7 +141,7 @@ hfs_vnop_getnamedstream(struct vnop_getnamedstream_args* ap)
                hfs_unlock(cp);
                return (ENOATTR);
        }
-       error = hfs_vgetrsrc(VTOHFS(vp), vp, svpp, TRUE);
+       error = hfs_vgetrsrc(VTOHFS(vp), vp, svpp, TRUE, FALSE);
        hfs_unlock(cp);
 
        return (error);
@@ -184,7 +184,7 @@ hfs_vnop_makenamedstream(struct vnop_makenamedstream_args* ap)
        if ((error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) {
                return (error);
        }
-       error = hfs_vgetrsrc(VTOHFS(vp), vp, svpp, TRUE);
+       error = hfs_vgetrsrc(VTOHFS(vp), vp, svpp, TRUE, FALSE);
        hfs_unlock(cp);
 
        return (error);
@@ -328,7 +328,7 @@ hfs_vnop_getxattr(struct vnop_getxattr_args *ap)
                                openunlinked = 1;
                        }
                        
-                       result = hfs_vgetrsrc(hfsmp, vp, &rvp, TRUE);
+                       result = hfs_vgetrsrc(hfsmp, vp, &rvp, TRUE, FALSE);
                        hfs_unlock(cp);
                        if (result) {
                                return (result);
@@ -719,7 +719,7 @@ hfs_vnop_setxattr(struct vnop_setxattr_args *ap)
                        openunlinked = 1;
                }
 
-               result = hfs_vgetrsrc(hfsmp, vp, &rvp, TRUE);
+               result = hfs_vgetrsrc(hfsmp, vp, &rvp, TRUE, FALSE);
                hfs_unlock(cp);
                if (result) {
                        return (result);
@@ -1096,7 +1096,7 @@ hfs_vnop_removexattr(struct vnop_removexattr_args *ap)
                        hfs_unlock(cp);
                        return (ENOATTR);
                }
-               result = hfs_vgetrsrc(hfsmp, vp, &rvp, TRUE);
+               result = hfs_vgetrsrc(hfsmp, vp, &rvp, TRUE, FALSE);
                hfs_unlock(cp);
                if (result) {
                        return (result);
@@ -2302,9 +2302,9 @@ free_attr_blks(struct hfsmount *hfsmp, int blkcnt, HFSPlusExtentDescriptor *exte
                        break;
                }
                (void)BlockDeallocate(hfsmp, extents[i].startBlock, extents[i].blockCount);
+               remblks -= extents[i].blockCount;
                extents[i].startBlock = 0;
                extents[i].blockCount = 0;
-               remblks -= extents[i].blockCount;
 
 #if HFS_XATTR_VERBOSE
                printf("hfs: free_attr_blks: BlockDeallocate [%d, %d]\n",
index 64c7b86f0cde969922909410077f313f9b1470d7..99d586408da2425df6fc365de6c9f5f0beecd042 100644 (file)
@@ -696,6 +696,18 @@ BTZeroUnusedNodes(FCB *filePtr)
                                                goto ErrorExit;
                                        }
                                        
+                                       if (buf_flags(bp) & B_LOCKED) {
+                                               /* 
+                                                * This node is already part of a transaction and will be
+                                                * written when the transaction is committed so don't write it here.
+                                                * If we did, then we'd hit a panic in hfs_vnop_bwrite since
+                                                * B_LOCKED is still set
+                                                */
+                                               buf_brelse(bp);
+                                               continue;
+                                       }
+
+                                       
                                        buf_clear(bp);
                                        buf_markaged(bp);
                                        
index 6325962b2cfe1fc2b08c0ec248822361a30fffec..0ed79dc69acc3cfcf564ab0ab0b7fda0099a0fe6 100644 (file)
@@ -123,8 +123,8 @@ imageboot_setup()
        error = vfs_mountroot();
 
        if (error == 0 && rootvnode != NULL) {
-               struct vnode *tvp;
-               struct vnode *newdp;
+               vnode_t newdp, old_rootvnode;
+               mount_t new_rootfs, old_rootfs;
 
                /*
                 * Get the vnode for '/'.
@@ -133,17 +133,45 @@ imageboot_setup()
                if (VFS_ROOT(TAILQ_LAST(&mountlist,mntlist), &newdp, vfs_context_kernel()))
                        panic("%s: cannot find root vnode", __FUNCTION__);
 
+               old_rootvnode = rootvnode;
+               old_rootfs = rootvnode->v_mount;
+
+               mount_list_remove(old_rootfs);
+
+               mount_lock(old_rootfs);
+#ifdef CONFIG_IMGSRC_ACCESS
+               old_rootfs->mnt_kern_flag |= MNTK_BACKS_ROOT;
+#endif /* CONFIG_IMGSRC_ACCESS */
+               old_rootfs->mnt_flag &= ~MNT_ROOTFS;
+               mount_unlock(old_rootfs);
+
+               rootvnode = newdp;
+
+               new_rootfs = rootvnode->v_mount;
+               mount_lock(new_rootfs);
+               new_rootfs->mnt_flag |= MNT_ROOTFS;
+               mount_unlock(new_rootfs);
+
                vnode_ref(newdp);
                vnode_put(newdp);
-               tvp = rootvnode;
-               vnode_rele(tvp);
                filedesc0.fd_cdir = newdp;
-               rootvnode = newdp;
-               mount_list_lock();
-               TAILQ_REMOVE(&mountlist, TAILQ_FIRST(&mountlist), mnt_list);
-               mount_list_unlock();
-               mountlist.tqh_first->mnt_flag |= MNT_ROOTFS;
                DBG_TRACE("%s: root switched\n", __FUNCTION__);
+
+#ifdef CONFIG_IMGSRC_ACCESS
+               if (PE_imgsrc_mount_supported()) {
+                       imgsrc_rootvnode = old_rootvnode;
+               } else {
+                       vnode_getalways(old_rootvnode);
+                       vnode_rele(old_rootvnode);
+                       vnode_put(old_rootvnode);
+               }
+#else 
+               vnode_getalways(old_rootvnode);
+               vnode_rele(old_rootvnode);
+               vnode_put(old_rootvnode);
+#endif /* CONFIG_IMGSRC_ACCESS */
+
+
        }
 done:
        FREE_ZONE(root_path, MAXPATHLEN, M_NAMEI);
index ee97c249c125c54487ad3a96d5755fb5b26d692c..bc3089a8fe6108d0acdbf4b77fc702fc81b0f9c4 100644 (file)
@@ -168,10 +168,10 @@ static lck_mtx_t        stackshot_subsys_mutex;
 void *stackshot_snapbuf = NULL;
 
 int
-stack_snapshot2(pid_t pid, user_addr_t tracebuf, uint32_t tracebuf_size, uint32_t options, int32_t *retval);
+stack_snapshot2(pid_t pid, user_addr_t tracebuf, uint32_t tracebuf_size, uint32_t flags, uint32_t dispatch_offset, int32_t *retval);
 
 extern void
-kdp_snapshot_preflight(int pid, void  *tracebuf, uint32_t tracebuf_size, uint32_t options);
+kdp_snapshot_preflight(int pid, void  *tracebuf, uint32_t tracebuf_size, uint32_t flags, uint32_t dispatch_offset);
 
 extern int
 kdp_stack_snapshot_geterror(void);
@@ -1705,11 +1705,11 @@ stack_snapshot(struct proc *p, register struct stack_snapshot_args *uap, int32_t
                 return(error);
 
        return stack_snapshot2(uap->pid, uap->tracebuf, uap->tracebuf_size,
-           uap->options, retval);
+           uap->flags, uap->dispatch_offset, retval);
 }
 
 int
-stack_snapshot2(pid_t pid, user_addr_t tracebuf, uint32_t tracebuf_size, uint32_t options, int32_t *retval)
+stack_snapshot2(pid_t pid, user_addr_t tracebuf, uint32_t tracebuf_size, uint32_t flags, uint32_t dispatch_offset, int32_t *retval)
 {
        int error = 0;
        unsigned bytesTraced = 0;
@@ -1730,7 +1730,7 @@ stack_snapshot2(pid_t pid, user_addr_t tracebuf, uint32_t tracebuf_size, uint32_
                goto error_exit;
        }
 /* Preload trace parameters*/  
-       kdp_snapshot_preflight(pid, stackshot_snapbuf, tracebuf_size, options);
+       kdp_snapshot_preflight(pid, stackshot_snapbuf, tracebuf_size, flags, dispatch_offset);
 
 /* Trap to the debugger to obtain a coherent stack snapshot; this populates
  * the trace buffer
index 1a0f609ca828a718151f96e7156ecaa605a85eb0..5d195dcf0d628e111cd4dbaec3529a842b84fc8e 100644 (file)
@@ -1564,6 +1564,17 @@ kevent_register(struct kqueue *kq, struct kevent64_s *kev, __unused struct proc
                                knote_enqueue(kn);
                }
 
+               /*
+                * The user may change some filter values after the
+                * initial EV_ADD, but doing so will not reset any 
+                * filter which have already been triggered.
+                */
+               kn->kn_kevent.udata = kev->udata;
+               if (fops->f_isfd || fops->f_touch == NULL) {
+                       kn->kn_sfflags = kev->fflags;
+                       kn->kn_sdata = kev->data;
+               }
+
                /*
                 * If somebody is in the middle of dropping this
                 * knote - go find/insert a new one.  But we have
@@ -1578,17 +1589,11 @@ kevent_register(struct kqueue *kq, struct kevent64_s *kev, __unused struct proc
                }
 
                /*
-                * The user may change some filter values after the
-                * initial EV_ADD, but doing so will not reset any 
-                * filter which have already been triggered.
+                * Call touch routine to notify filter of changes
+                * in filter values.
                 */
-               kn->kn_kevent.udata = kev->udata;
                if (!fops->f_isfd && fops->f_touch != NULL)
                        fops->f_touch(kn, kev, EVENT_REGISTER);
-               else {
-                       kn->kn_sfflags = kev->fflags;
-                       kn->kn_sdata = kev->data;
-               }
 
                /* We may need to push some info down to a networked filesystem */
                if (kn->kn_filter == EVFILT_VNODE) {
@@ -1680,13 +1685,10 @@ knote_process(struct knote      *kn,
                                }
 
                                /* capture the kevent data - using touch if specified */
-                               if (result) {
-                                       if (touch) {
-                                               kn->kn_fop->f_touch(kn, &kev, EVENT_PROCESS);
-                                       } else {
-                                               kev = kn->kn_kevent;
-                                       }
+                               if (result && touch) {
+                                       kn->kn_fop->f_touch(kn, &kev, EVENT_PROCESS);
                                }
+
                                /* convert back to a kqlock - bail if the knote went away */
                                if (!knoteuse2kqlock(kq, kn)) {
                                        return EJUSTRETURN;
@@ -1695,6 +1697,12 @@ knote_process(struct knote       *kn,
                                        if (!(kn->kn_status & KN_ACTIVE)) {
                                                knote_activate(kn, 0);
                                        }
+
+                                       /* capture all events that occurred during filter */
+                                       if (!touch) {
+                                               kev = kn->kn_kevent;
+                                       }
+
                                } else if ((kn->kn_status & KN_STAYQUEUED) == 0) {
                                        /* was already dequeued, so just bail on this one */
                                        return EJUSTRETURN;
@@ -1724,21 +1732,26 @@ knote_process(struct knote      *kn,
 
        if (result == 0) {
                return EJUSTRETURN;
-       } else if (kn->kn_flags & EV_ONESHOT) {
+       } else if ((kn->kn_flags & EV_ONESHOT) != 0) {
                knote_deactivate(kn);
                if (kqlock2knotedrop(kq, kn)) {
                        kn->kn_fop->f_detach(kn);
                        knote_drop(kn, p);
                }
-       } else if (kn->kn_flags & (EV_CLEAR | EV_DISPATCH)) {
-               knote_deactivate(kn);
-               /* manually clear knotes who weren't 'touch'ed */
-               if ((touch == 0) && (kn->kn_flags & EV_CLEAR)) {
+       } else if ((kn->kn_flags & (EV_CLEAR | EV_DISPATCH)) != 0) {
+               if ((kn->kn_flags & EV_DISPATCH) != 0) {
+                       /* deactivate and disable all dispatch knotes */
+                       knote_deactivate(kn);
+                       kn->kn_status |= KN_DISABLED;
+               } else if (!touch || kn->kn_fflags == 0) {
+                       /* only deactivate if nothing since the touch */
+                       knote_deactivate(kn);
+               }
+               if (!touch && (kn->kn_flags & EV_CLEAR) != 0) {
+                       /* manually clear non-touch knotes */
                        kn->kn_data = 0;
                        kn->kn_fflags = 0;
                }
-               if (kn->kn_flags & EV_DISPATCH)
-                       kn->kn_status |= KN_DISABLED;
                kqunlock(kq);
        } else {
                /*
index f5e141455921d02845e6ec2b974a49d3dc47977f..35e9a43a685b85dd66234d4bade9f89cb9a31172 100644 (file)
@@ -315,7 +315,7 @@ sysctl_handle_kern_memorystatus_priority_list(__unused struct sysctl_oid *oid, _
 #if DEBUG 
                printf("set jetsam priority pids = { ");
                for (i = 0; i < jetsam_priority_list_count; i++) {
-                       printf("%d ", temp_list[i].pid);
+                       printf("(%d, 0x%08x, %d) ", temp_list[i].pid, temp_list[i].flags, temp_list[i].hiwat_pages);
                }
                printf("}\n");
 #endif /* DEBUG */
@@ -326,6 +326,10 @@ sysctl_handle_kern_memorystatus_priority_list(__unused struct sysctl_oid *oid, _
                for (i = jetsam_priority_list_count; i < kMaxPriorityEntries; i++) {
                        jetsam_priority_list[i].pid = 0;
                        jetsam_priority_list[i].flags = 0;
+                       jetsam_priority_list[i].hiwat_pages = -1;
+                       jetsam_priority_list[i].hiwat_reserved1 = -1;
+                       jetsam_priority_list[i].hiwat_reserved2 = -1;
+                       jetsam_priority_list[i].hiwat_reserved3 = -1;
                }
                jetsam_priority_list_index = 0;
                lck_mtx_unlock(jetsam_list_mlock);
index 02166d578bc6aa04041b641746da756ed6be35bc..6da43d2fdb95e5cc796bd51a0b271e30971f87d4 100644 (file)
@@ -690,7 +690,7 @@ mprotect(__unused proc_t p, struct mprotect_args *uap, __unused int32_t *retval)
 
        user_addr = (mach_vm_offset_t) uap->addr;
        user_size = (mach_vm_size_t) uap->len;
-       prot = (vm_prot_t)(uap->prot & VM_PROT_ALL);
+       prot = (vm_prot_t)(uap->prot & (VM_PROT_ALL | VM_PROT_TRUSTED));
 
        if (user_addr & PAGE_MASK_64) {
                /* UNIX SPEC: user address is not page-aligned, return EINVAL */
@@ -728,6 +728,34 @@ mprotect(__unused proc_t p, struct mprotect_args *uap, __unused int32_t *retval)
        if (error)
                return (error);
 #endif
+
+       if(prot & VM_PROT_TRUSTED) {
+#if CONFIG_DYNAMIC_CODE_SIGNING
+               /* CODE SIGNING ENFORCEMENT - JIT support */
+               /* The special protection value VM_PROT_TRUSTED requests that we treat
+                * this page as if it had a valid code signature.
+                * If this is enabled, there MUST be a MAC policy implementing the 
+                * mac_proc_check_mprotect() hook above. Otherwise, Codesigning will be
+                * compromised because the check would always succeed and thusly any
+                * process could sign dynamically. */
+               result = vm_map_sign(user_map, 
+                                    vm_map_trunc_page(user_addr), 
+                                    vm_map_round_page(user_addr+user_size));
+               switch (result) {
+                       case KERN_SUCCESS:
+                               break;
+                       case KERN_INVALID_ADDRESS:
+                               /* UNIX SPEC: for an invalid address range, return ENOMEM */
+                               return ENOMEM;
+                       default:
+                               return EINVAL;
+               }
+#else
+               return ENOTSUP;
+#endif
+       }
+       prot &= ~VM_PROT_TRUSTED;
+       
        result = mach_vm_protect(user_map, user_addr, user_size,
                                 FALSE, prot);
        switch (result) {
index b51c4ecbe35ae692fcbd3d52c6db058e8382134d..02b61872ab463d2ebf642bd66fac6081f56f90c7 100644 (file)
 
 int    donice(struct proc *curp, struct proc *chgp, int n);
 int    dosetrlimit(struct proc *p, u_int which, struct rlimit *limp);
+static void do_background_socket(struct proc *curp, thread_t thread, int priority);
 static int do_background_thread(struct proc *curp, int priority);
+static int do_background_task(struct proc *curp, int priority);
 
 rlim_t maxdmap = MAXDSIZ;      /* XXX */ 
 rlim_t maxsmap = MAXSSIZ - PAGE_SIZE;  /* XXX */ 
@@ -369,10 +371,35 @@ setpriority(struct proc *curp, struct setpriority_args *uap, __unused int32_t *r
                        return (EINVAL);
                }
                error = do_background_thread(curp, uap->prio);
+               (void) do_background_socket(curp, current_thread(), uap->prio);
                found++;
                break;
        }
 
+       case PRIO_DARWIN_PROCESS: {
+               if (uap->who == 0)
+                       p = curp;
+               else {
+                       p = proc_find(uap->who);
+                       if (p == 0)
+                               break;
+                       refheld = 1;
+               }
+
+               error = do_background_task(p, uap->prio);
+               (void) do_background_socket(p, NULL, uap->prio);
+               
+               proc_lock(p);
+               p->p_iopol_disk = (uap->prio == PRIO_DARWIN_BG ? 
+                               IOPOL_THROTTLE : IOPOL_DEFAULT); 
+               proc_unlock(p);
+
+               found++;
+               if (refheld != 0)
+                       proc_rele(p);
+               break;
+       }
+
        default:
                return (EINVAL);
        }
@@ -427,20 +454,93 @@ out:
        return (error);
 }
 
+static int
+do_background_task(struct proc *p, int priority)
+{
+       int error = 0;
+       task_category_policy_data_t info;
+
+       if (priority & PRIO_DARWIN_BG) { 
+               info.role = TASK_THROTTLE_APPLICATION;
+       } else {
+               info.role = TASK_DEFAULT_APPLICATION;
+       }
+
+       error = task_policy_set(p->task,
+                       TASK_CATEGORY_POLICY,
+                       (task_policy_t) &info,
+                       TASK_CATEGORY_POLICY_COUNT);
+       return (error);
+}
+
+static void 
+do_background_socket(struct proc *curp, thread_t thread, int priority)
+{
+       struct filedesc                     *fdp;
+       struct fileproc                     *fp;
+       int                                 i;
+
+       if (priority & PRIO_DARWIN_BG) {
+               /* enable network throttle process-wide (if no thread is specified) */
+               if (thread == NULL) {
+                       proc_fdlock(curp);
+                       fdp = curp->p_fd;
+
+                       for (i = 0; i < fdp->fd_nfiles; i++) {
+                               struct socket       *sockp;
+
+                               fp = fdp->fd_ofiles[i];
+                               if (fp == NULL || (fdp->fd_ofileflags[i] & UF_RESERVED) != 0 ||
+                                               fp->f_fglob->fg_type != DTYPE_SOCKET) {
+                                       continue;
+                               }
+                               sockp = (struct socket *)fp->f_fglob->fg_data;
+                               sockp->so_traffic_mgt_flags |= TRAFFIC_MGT_SO_BACKGROUND;
+                               sockp->so_background_thread = NULL;
+                       }
+                       proc_fdunlock(curp);
+               }
+
+       } else {
+               /* disable networking IO throttle.
+                * NOTE - It is a known limitation of the current design that we 
+                * could potentially clear TRAFFIC_MGT_SO_BACKGROUND bit for 
+                * sockets created by other threads within this process.  
+                */
+               proc_fdlock(curp);
+               fdp = curp->p_fd;
+               for ( i = 0; i < fdp->fd_nfiles; i++ ) {
+                       struct socket       *sockp;
+
+                       fp = fdp->fd_ofiles[ i ];
+                       if ( fp == NULL || (fdp->fd_ofileflags[ i ] & UF_RESERVED) != 0 ||
+                                       fp->f_fglob->fg_type != DTYPE_SOCKET ) {
+                               continue;
+                       }
+                       sockp = (struct socket *)fp->f_fglob->fg_data;
+                       /* skip if only clearing this thread's sockets */
+                       if ((thread) && (sockp->so_background_thread != thread)) {
+                               continue;
+                       }
+                       sockp->so_traffic_mgt_flags &= ~TRAFFIC_MGT_SO_BACKGROUND;
+                       sockp->so_background_thread = NULL;
+               }
+               proc_fdunlock(curp);
+       }
+}
+
+
 /*
  * do_background_thread
  * Returns:    0                       Success
  * XXX - todo - does this need a MACF hook?
  */
 static int
-do_background_thread(struct proc *curp, int priority)
+do_background_thread(struct proc *curp __unused, int priority)
 {
-       int                                                                     i;
        thread_t                                                        thread;
        struct uthread                                          *ut;
        thread_precedence_policy_data_t         policy;
-       struct filedesc                                         *fdp;
-       struct fileproc                                         *fp;
        
        thread = current_thread();
        ut = get_bsdthread_info(thread);
@@ -461,31 +561,6 @@ do_background_thread(struct proc *curp, int priority)
                thread_policy_set( thread, THREAD_PRECEDENCE_POLICY,
                                                   (thread_policy_t)&policy,
                                                   THREAD_PRECEDENCE_POLICY_COUNT );
-
-               /* disable networking IO throttle.
-                * NOTE - It is a known limitation of the current design that we 
-                * could potentially clear TRAFFIC_MGT_SO_BACKGROUND bit for 
-                * sockets created by other threads within this process.  
-                */
-               proc_fdlock(curp);
-               fdp = curp->p_fd;
-               for ( i = 0; i < fdp->fd_nfiles; i++ ) {
-                       struct socket           *sockp;
-                       
-                       fp = fdp->fd_ofiles[ i ];
-                       if ( fp == NULL || (fdp->fd_ofileflags[ i ] & UF_RESERVED) != 0 || 
-                                fp->f_fglob->fg_type != DTYPE_SOCKET ) {
-                               continue;
-                       }
-                       sockp = (struct socket *)fp->f_fglob->fg_data;
-                       if ( sockp->so_background_thread != thread ) {
-                               continue;
-                       }
-                       sockp->so_traffic_mgt_flags &= ~TRAFFIC_MGT_SO_BACKGROUND;
-                       sockp->so_background_thread = NULL;
-               }
-               proc_fdunlock(curp);
-
                return(0);
        }
        
index 7303287c109272dfbb47ab896198fbe60360bd72..842a3e5720abbcca16c9ca4a9562fa15ae92f54d 100644 (file)
@@ -2365,6 +2365,47 @@ SYSCTL_PROC(_kern, KERN_NETBOOT, netboot,
                0, 0, sysctl_netboot, "I", "");
 #endif
 
+#ifdef CONFIG_IMGSRC_ACCESS
+static int
+sysctl_imgsrcdev 
+(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
+{
+       vfs_context_t ctx = vfs_context_current();
+       vnode_t devvp;
+       int result;
+
+       if (!vfs_context_issuser(ctx)) {
+               return EPERM;
+       }    
+
+       if (imgsrc_rootvnode == NULL) {
+               return ENOENT;
+       }    
+
+       result = vnode_getwithref(imgsrc_rootvnode);
+       if (result != 0) {
+               return result;
+       }
+       
+       devvp = vnode_mount(imgsrc_rootvnode)->mnt_devvp;
+       result = vnode_getwithref(devvp);
+       if (result != 0) {
+               goto out;
+       }
+
+       result = sysctl_io_number(req, vnode_specrdev(devvp), sizeof(dev_t), NULL, NULL);
+
+       vnode_put(devvp);
+out:
+       vnode_put(imgsrc_rootvnode);
+       return result;
+}
+
+SYSCTL_PROC(_kern, OID_AUTO, imgsrcdev,
+               CTLTYPE_INT | CTLFLAG_RD,
+               0, 0, sysctl_imgsrcdev, "I", ""); 
+#endif /* CONFIG_IMGSRC_ACCESS */
+
 static int
 sysctl_usrstack
 (__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
@@ -2815,3 +2856,12 @@ SYSCTL_INT (_kern, OID_AUTO, stack_size,
 SYSCTL_INT (_kern, OID_AUTO, stack_depth_max,
            CTLFLAG_RD, (int *) &kernel_stack_depth_max, 0, "Max kernel stack depth at interrupt or context switch");
 
+/*
+ * enable back trace for port allocations
+ */
+extern int ipc_portbt;
+
+SYSCTL_INT(_kern, OID_AUTO, ipc_portbt, 
+               CTLFLAG_RW | CTLFLAG_KERN, 
+               &ipc_portbt, 0, "");
+
index 8c58b3ece4e5fd1c3597a341e4077807c21e4eb1..df178d791a3cd693602f1ab26ae5d9f2b0bf6b3f 100644 (file)
@@ -136,6 +136,7 @@ static boolean_t workqueue_run_nextitem(proc_t p, struct workqueue *wq, thread_t
 static void wq_runitem(proc_t p, user_addr_t item, thread_t th, struct threadlist *tl,
                       int reuse_thread, int wake_thread, int return_directly);
 static void wq_unpark_continue(void);
+static void wq_unsuspend_continue(void);
 static int setup_wqthread(proc_t p, thread_t th, user_addr_t item, int reuse_thread, struct threadlist *tl);
 static boolean_t workqueue_addnewthread(struct workqueue *wq);
 static void workqueue_removethread(struct threadlist *tl);
@@ -446,7 +447,6 @@ bsdthread_register(struct proc *p, struct bsdthread_register_args  *uap, __unuse
        return(0);
 }
 
-
 uint32_t wq_yielded_threshold          = WQ_YIELDED_THRESHOLD;
 uint32_t wq_yielded_window_usecs       = WQ_YIELDED_WINDOW_USECS;
 uint32_t wq_stalled_window_usecs       = WQ_STALLED_WINDOW_USECS;
@@ -903,15 +903,11 @@ workqueue_callback(int type, thread_t thread)
                 * the thread lock for the thread being UNBLOCKED
                 * is also held
                 */
-               if (tl->th_suspended) {
-                       OSAddAtomic(-1, &tl->th_suspended);
-                       KERNEL_DEBUG1(0xefffd024, wq, wq->wq_threads_scheduled, tl->th_priority, tl->th_affinity_tag, thread_tid(thread));
-               } else {
-                       OSAddAtomic(1, &wq->wq_thactive_count[tl->th_priority][tl->th_affinity_tag]);
+                OSAddAtomic(1, &wq->wq_thactive_count[tl->th_priority][tl->th_affinity_tag]);
 
-                       KERNEL_DEBUG1(0xefffd020 | DBG_FUNC_END, wq, wq->wq_threads_scheduled, tl->th_priority, tl->th_affinity_tag, thread_tid(thread));
-               }
-               break;
+                KERNEL_DEBUG1(0xefffd020 | DBG_FUNC_END, wq, wq->wq_threads_scheduled, tl->th_priority, tl->th_affinity_tag, thread_tid(thread));
+
+                break;
        }
 }
 
@@ -986,7 +982,7 @@ workqueue_addnewthread(struct workqueue *wq)
        p = wq->wq_proc;
        workqueue_unlock(p);
 
-       kret = thread_create_workq(wq->wq_task, &th);
+       kret = thread_create_workq(wq->wq_task, (thread_continue_t)wq_unsuspend_continue, &th);
 
        if (kret != KERN_SUCCESS)
                goto failed;
@@ -1046,7 +1042,6 @@ workqueue_addnewthread(struct workqueue *wq)
        tl->th_affinity_tag = -1;
        tl->th_priority = WORKQUEUE_NUMPRIOS;
        tl->th_policy = -1;
-       tl->th_suspended = 1;
 
 #if defined(__ppc__)
        //ml_fp_setvalid(FALSE);
@@ -1057,7 +1052,7 @@ workqueue_addnewthread(struct workqueue *wq)
        uth->uu_threadlist = (void *)tl;
 
         workqueue_lock_spin(p);
-
+       
        TAILQ_INSERT_TAIL(&wq->wq_thidlelist, tl, th_entry);
 
        wq->wq_thidlecount++;
@@ -1306,7 +1301,6 @@ workq_kernreturn(struct proc *p, struct workq_kernreturn_args  *uap, __unused in
 
 }
 
-
 void
 workqueue_exit(struct proc *p)
 {
@@ -1457,9 +1451,6 @@ workqueue_removeitem(struct workqueue *wq, int prio, user_addr_t item)
        return (error);
 }
 
-
-
-
 static int workqueue_importance[WORKQUEUE_NUMPRIOS] = 
 {
        2, 0, -2,
@@ -1710,14 +1701,11 @@ grab_idle_thread:
                tl->th_flags &= ~TH_LIST_SUSPENDED;
                reuse_thread = 0;
 
-               thread_sched_call(tl->th_thread, workqueue_callback);
-
        } else if ((tl->th_flags & TH_LIST_BLOCKED) == TH_LIST_BLOCKED) {
                tl->th_flags &= ~TH_LIST_BLOCKED;
-               tl->th_flags |= TH_LIST_BUSY;
                wake_thread = 1;
        }
-       tl->th_flags |= TH_LIST_RUNNING;
+       tl->th_flags |= TH_LIST_RUNNING | TH_LIST_BUSY;
 
        wq->wq_threads_scheduled++;
        wq->wq_thscheduled_count[priority][affinity_tag]++;
@@ -1894,6 +1882,80 @@ parkit:
 }
 
 
+static void
+wq_unsuspend_continue(void)
+{
+       struct uthread *uth = NULL;
+       thread_t th_to_unsuspend;
+       struct threadlist *tl;
+       proc_t  p;
+
+       th_to_unsuspend = current_thread();
+       uth = get_bsdthread_info(th_to_unsuspend);
+
+       if (uth != NULL && (tl = uth->uu_threadlist) != NULL) {
+               
+               if ((tl->th_flags & (TH_LIST_RUNNING | TH_LIST_BUSY)) == TH_LIST_RUNNING) {
+                       /*
+                        * most likely a normal resume of this thread occurred...
+                        * it's also possible that the thread was aborted after we
+                        * finished setting it up so that it could be dispatched... if
+                        * so, thread_bootstrap_return will notice the abort and put
+                        * the thread on the path to self-destruction
+                        */
+normal_resume_to_user:
+                       thread_sched_call(th_to_unsuspend, workqueue_callback);
+
+                       thread_bootstrap_return();
+               }
+               /*
+                * if we get here, it's because we've been resumed due to
+                * an abort of this thread (process is crashing)
+                */
+               p = current_proc();
+
+               workqueue_lock_spin(p);
+
+               if (tl->th_flags & TH_LIST_SUSPENDED) {
+                       /*
+                        * thread has been aborted while still on our idle
+                        * queue... remove it from our domain...
+                        * workqueue_removethread consumes the lock
+                        */
+                       workqueue_removethread(tl);
+
+                       thread_bootstrap_return();
+               }
+               while ((tl->th_flags & TH_LIST_BUSY)) {
+                       /*
+                        * this thread was aborted after we started making
+                        * it runnable, but before we finished dispatching it...
+                        * we need to wait for that process to finish,
+                        * and we need to ask for a wakeup instead of a
+                        * thread_resume since the abort has already resumed us
+                        */
+                       tl->th_flags |= TH_LIST_NEED_WAKEUP;
+
+                       assert_wait((caddr_t)tl, (THREAD_UNINT));
+
+                       workqueue_unlock(p);
+
+                       thread_block(THREAD_CONTINUE_NULL);
+
+                       workqueue_lock_spin(p);
+               }
+               workqueue_unlock(p);
+               /*
+                * we have finished setting up the thread's context...
+                * thread_bootstrap_return will take us through the abort path
+                * where the thread will self destruct
+                */
+               goto normal_resume_to_user;
+       }
+       thread_bootstrap_return();
+}
+
+
 static void
 wq_unpark_continue(void)
 {
@@ -1996,11 +2058,19 @@ wq_runitem(proc_t p, user_addr_t item, thread_t th, struct threadlist *tl,
        } else {
                KERNEL_DEBUG1(0xefffd014 | DBG_FUNC_END, tl->th_workq, 0, 0, thread_tid(current_thread()), thread_tid(th));
 
-               thread_resume(th);
+               workqueue_lock_spin(p);
+
+               if (tl->th_flags & TH_LIST_NEED_WAKEUP)
+                       wakeup(tl);
+               else
+                       thread_resume(th);
+
+               tl->th_flags &= ~(TH_LIST_BUSY | TH_LIST_NEED_WAKEUP);
+               
+               workqueue_unlock(p);
        }
 }
 
-
 int
 setup_wqthread(proc_t p, thread_t th, user_addr_t item, int reuse_thread, struct threadlist *tl)
 {
index 3da6c6d55bfa076b1c217afa2272ac0952cf4845..11a276bbd86f328a76b707771392698225a7c4f7 100644 (file)
@@ -239,8 +239,7 @@ pread_nocancel(struct proc *p, struct pread_nocancel_args *uap, user_ssize_t *re
        
        donefileread(p, fp, fd);
 
-       if (!error)
-           KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_pread) | DBG_FUNC_NONE),
+       KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_pread) | DBG_FUNC_NONE),
              uap->fd, uap->nbyte, (unsigned int)((uap->offset >> 32)), (unsigned int)(uap->offset), 0);
 
 out:
@@ -531,8 +530,7 @@ errout:
        else
                fp_drop(p, fd, fp, 0);
 
-       if (!error)
-           KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_pwrite) | DBG_FUNC_NONE),
+       KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_pwrite) | DBG_FUNC_NONE),
              uap->fd, uap->nbyte, (unsigned int)((uap->offset >> 32)), (unsigned int)(uap->offset), 0);
        
         return(error);
index d82fc7f835a288b50e115185c1f603654b3fc0af..5ab2dd50b03eacfcfcc57e25f4fb373a5009e309 100644 (file)
 362    AUE_KQUEUE      ALL     { int kqueue(void); } 
 363    AUE_NULL        ALL     { int kevent(int fd, const struct kevent *changelist, int nchanges, struct kevent *eventlist, int nevents, const struct timespec *timeout); } 
 364    AUE_LCHOWN      ALL     { int lchown(user_addr_t path, uid_t owner, gid_t group); }
-365    AUE_STACKSNAPSHOT       ALL     { int stack_snapshot(pid_t pid, user_addr_t tracebuf, uint32_t tracebuf_size, uint32_t options) NO_SYSCALL_STUB; }
+365    AUE_STACKSNAPSHOT       ALL     { int stack_snapshot(pid_t pid, user_addr_t tracebuf, uint32_t tracebuf_size, uint32_t flags, uint32_t dispatch_offset) NO_SYSCALL_STUB; }
 #if CONFIG_WORKQUEUE
 366    AUE_NULL        ALL     { int bsdthread_register(user_addr_t threadstart, user_addr_t wqthread, int pthsize,user_addr_t dummy_value, user_addr_t targetconc_ptr, uint64_t dispatchqueue_offset) NO_SYSCALL_STUB; } 
 367    AUE_WORKQOPEN   ALL     { int workq_open(void) NO_SYSCALL_STUB; }
index 26f38c8f5238edce4f38790a7bf02fbd551ed2b7..202f2d8588dcda57dd3388049473c3e799ec9ef7 100644 (file)
@@ -780,6 +780,8 @@ unp_attach(struct socket *so)
 static void
 unp_detach(struct unpcb *unp)
 {
+       int so_locked = 1;
+
        lck_rw_lock_exclusive(unp_list_mtx);
        LIST_REMOVE(unp, unp_link);
        lck_rw_done(unp_list_mtx);
@@ -805,13 +807,46 @@ unp_detach(struct unpcb *unp)
        if (unp->unp_conn)
                unp_disconnect(unp);
        while (unp->unp_refs.lh_first) {
-               struct unpcb *unp2 = unp->unp_refs.lh_first;
-               socket_unlock(unp->unp_socket, 0);
-               socket_lock(unp2->unp_socket, 1);
-               unp_drop(unp2, ECONNRESET);
-               socket_unlock(unp2->unp_socket, 1);
+               struct unpcb *unp2 = NULL;
+
+               /* This datagram socket is connected to one or more
+                * sockets. In order to avoid a race condition between removing
+                * this reference and closing the connected socket, we need 
+                * to check disconnect_in_progress
+                */
+               if (so_locked == 1) {
+                       socket_unlock(unp->unp_socket, 0);
+                       so_locked = 0;
+               }
+               lck_mtx_lock(unp_disconnect_lock);
+               while (disconnect_in_progress != 0) {
+                       (void)msleep((caddr_t)&disconnect_in_progress, unp_disconnect_lock,
+                               PSOCK, "disconnect", NULL);
+               }
+               disconnect_in_progress = 1;
+               lck_mtx_unlock(unp_disconnect_lock);
+
+               /* Now we are sure that any unpcb socket disconnect is not happening */
+               if (unp->unp_refs.lh_first != NULL) {
+                       unp2 = unp->unp_refs.lh_first;
+                       socket_lock(unp2->unp_socket, 1);
+               }
+               
+               lck_mtx_lock(unp_disconnect_lock);
+               disconnect_in_progress = 0;
+               wakeup(&disconnect_in_progress);
+               lck_mtx_unlock(unp_disconnect_lock);
+                       
+               if (unp2 != NULL) {
+                       /* We already locked this socket and have a reference on it */
+                       unp_drop(unp2, ECONNRESET);
+                       socket_unlock(unp2->unp_socket, 1);
+               }
+       }
+
+       if (so_locked == 0) {
                socket_lock(unp->unp_socket, 0);
+               so_locked = 1;
        }
        soisdisconnected(unp->unp_socket);
        /* makes sure we're getting dealloced */
@@ -1160,9 +1195,7 @@ unp_connect2(struct socket *so, struct socket *so2)
        switch (so->so_type) {
 
        case SOCK_DGRAM:
-               lck_rw_lock_exclusive(unp_list_mtx);
                LIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_reflink);
-               lck_rw_done(unp_list_mtx);
 
                
                /* Avoid lock order reversals due to drop/acquire in soisconnected. */
@@ -1292,9 +1325,7 @@ try_again:
        switch (unp->unp_socket->so_type) {
 
        case SOCK_DGRAM:
-               lck_rw_lock_exclusive(unp_list_mtx);
                LIST_REMOVE(unp, unp_reflink);
-               lck_rw_done(unp_list_mtx);
                unp->unp_socket->so_state &= ~SS_ISCONNECTED;
                socket_unlock(so2, 1);
                break;
index 920fbe064e55efe297a3f4b3b5e95876b073f3d5..1ea89d1cc46d125fe1650e4a6befad524bdf9906 100644 (file)
@@ -34,7 +34,8 @@ KERNELFILES= \
 PRIVATE_DATAFILES = \
        if_atm.h if_vlan_var.h if_ppp.h firewire.h \
        ppp_defs.h radix.h if_bond_var.h lacp.h ndrv_var.h \
-       raw_cb.h etherdefs.h iso88025.h if_pflog.h pfvar.h
+       raw_cb.h etherdefs.h iso88025.h if_pflog.h pfvar.h \
+       if_bridgevar.h
 
 PRIVATE_KERNELFILES = ${KERNELFILES} \
        bpfdesc.h dlil_pvt.h ppp_comp.h \
diff --git a/bsd/net/bridge.c b/bsd/net/bridge.c
deleted file mode 100644 (file)
index 01d3cb7..0000000
+++ /dev/null
@@ -1,906 +0,0 @@
-/*
- * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * Copyright (c) 1998 Luigi Rizzo
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD: src/sys/net/bridge.c,v 1.16.2.14 2001/02/09 23:13:41 luigi Exp $
- */
-
-/*
- * This code implements bridging in FreeBSD. It only acts on ethernet
- * type of interfaces (others are still usable for routing).
- * A bridging table holds the source MAC address/dest. interface for each
- * known node. The table is indexed using an hash of the source address.
- *
- * Input packets are tapped near the beginning of ether_input(), and
- * analysed by calling bridge_in(). Depending on the result, the packet
- * can be forwarded to one or more output interfaces using bdg_forward(),
- * and/or sent to the upper layer (e.g. in case of multicast).
- *
- * Output packets are intercepted near the end of ether_output(),
- * the correct destination is selected calling bridge_dst_lookup(),
- * and then forwarding is done using bdg_forward().
- * Bridging is controlled by the sysctl variable net.link.ether.bridge
- *
- * The arp code is also modified to let a machine answer to requests
- * irrespective of the port the request came from.
- *
- * In case of loops in the bridging topology, the bridge detects this
- * event and temporarily mutes output bridging on one of the ports.
- * Periodically, interfaces are unmuted by bdg_timeout().
- * Muting is only implemented as a safety measure, and also as
- * a mechanism to support a user-space implementation of the spanning
- * tree algorithm. In the final release, unmuting will only occur
- * because of explicit action of the user-level daemon.
- *
- * To build a bridging kernel, use the following option
- *    option BRIDGE
- * and then at runtime set the sysctl variable to enable bridging.
- *
- * Only one interface is supposed to have addresses set (but
- * there are no problems in practice if you set addresses for more
- * than one interface).
- * Bridging will act before routing, but nothing prevents a machine
- * from doing both (modulo bugs in the implementation...).
- *
- * THINGS TO REMEMBER
- *  - bridging is incompatible with multicast routing on the same
- *    machine. There is not an easy fix to this.
- *  - loop detection is still not very robust.
- *  - the interface of bdg_forward() could be improved.
- */
-
-#include <sys/param.h>
-#include <sys/mbuf.h>
-#include <sys/malloc.h>
-#include <sys/systm.h>
-#include <sys/socket.h> /* for net/if.h */
-#include <sys/kernel.h>
-#include <sys/sysctl.h>
-
-#include <net/if.h>
-#include <net/if_types.h>
-
-#include <netinet/in.h> /* for struct arpcom */
-#include <netinet/in_systm.h>
-#include <netinet/in_var.h>
-#include <netinet/ip.h>
-#include <netinet/if_ether.h> /* for struct arpcom */
-
-#include "opt_ipfw.h" 
-#include "opt_ipdn.h" 
-
-#if defined(IPFIREWALL)
-#include <net/route.h>
-#include <netinet/ip_fw.h>
-#if defined(DUMMYNET)
-#include <netinet/ip_dummynet.h>
-#endif
-#endif
-
-#include <net/bridge.h>
-
-/*
- * For debugging, you can use the following macros.
- * remember, rdtsc() only works on Pentium-class machines
-
-    quad_t ticks;
-    DDB(ticks = rdtsc();)
-    ... interesting code ...
-    DDB(bdg_fw_ticks += (u_int32_t)(rdtsc() - ticks) ; bdg_fw_count++ ;)
-
- *
- */
-
-#define DDB(x) x
-#define DEB(x)
-
-static void bdginit(void *);
-static void bdgtakeifaces(void);
-static void flush_table(void);
-static void bdg_promisc_on(void);
-static void parse_bdg_cfg(void);
-
-static int bdg_ipfw = 0 ;
-int do_bridge = 0;
-bdg_hash_table *bdg_table = NULL ;
-
-/*
- * System initialization
- */
-
-SYSINIT(interfaces, SI_SUB_PROTO_IF, SI_ORDER_FIRST, bdginit, NULL)
-
-static struct bdg_stats bdg_stats ;
-struct bdg_softc *ifp2sc = NULL ;
-/* XXX make it static of size BDG_MAX_PORTS */
-
-#define        IFP_CHK(ifp, x) \
-       if (ifp2sc[ifp->if_index].magic != 0xDEADBEEF) { x ; }
-
-/*
- * turn off promisc mode, optionally clear the IFF_USED flag.
- * The flag is turned on by parse_bdg_config
- */
-static void
-bdg_promisc_off(int clear_used)
-{
-    struct ifnet *ifp ;
-    ifnet_head_lock_shared();
-    TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
-               if ( (ifp2sc[ifp->if_index].flags & IFF_BDG_PROMISC) ) {
-                       int s, ret ;
-                       s = splimp();
-                       ret = ifnet_set_promiscuous(ifp, 0);
-                       splx(s);
-                       ifp2sc[ifp->if_index].flags &= ~(IFF_BDG_PROMISC|IFF_MUTE) ;
-                       DEB(printf(">> now %s%d promisc OFF if_flags 0x%x bdg_flags 0x%x\n",
-                               ifp->if_name, ifp->if_unit,
-                               ifp->if_flags, ifp2sc[ifp->if_index].flags);)
-               }
-               if (clear_used) {
-                       ifp2sc[ifp->if_index].flags &= ~(IFF_USED) ;
-                       bdg_stats.s[ifp->if_index].name[0] = '\0';
-               }
-    }
-    ifnet_head_done();
-}
-
-/*
- * set promisc mode on the interfaces we use.
- */
-static void
-bdg_promisc_on()
-{
-    struct ifnet *ifp ;
-    int s ;
-
-    ifnet_head_lock_shared();
-    TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
-               if ( !BDG_USED(ifp) )
-                       continue ;
-               if ( 0 == ( ifp->if_flags & IFF_UP) ) {
-                       s = splimp();
-                       if_up(ifp);
-                       splx(s);
-               }
-               if ( !(ifp2sc[ifp->if_index].flags & IFF_BDG_PROMISC) ) {
-                       int ret ;
-                       s = splimp();
-                       ret = ifnet_set_promiscuous(ifp, 1);
-                       splx(s);
-                       ifp2sc[ifp->if_index].flags |= IFF_BDG_PROMISC ;
-                       printf(">> now %s%d promisc ON if_flags 0x%x bdg_flags 0x%x\n",
-                               ifp->if_name, ifp->if_unit,
-                               ifp->if_flags, ifp2sc[ifp->if_index].flags);
-               }
-               if (BDG_MUTED(ifp)) {
-                       printf(">> unmuting %s%d\n", ifp->if_name, ifp->if_unit);
-                       BDG_UNMUTE(ifp) ;
-               }
-    }
-    ifnet_head_done();
-}
-
-static int
-sysctl_bdg(SYSCTL_HANDLER_ARGS)
-{
-    int error, oldval = do_bridge ;
-
-    error = sysctl_handle_int(oidp,
-       oidp->oid_arg1, oidp->oid_arg2, req);
-    DEB( printf("called sysctl for bridge name %s arg2 %d val %d->%d\n",
-       oidp->oid_name, oidp->oid_arg2,
-       oldval, do_bridge); )
-
-    if (bdg_table == NULL)
-       do_bridge = 0 ;
-    if (oldval != do_bridge) {
-       bdg_promisc_off( 1 ); /* reset previously used interfaces */
-       flush_table();
-       if (do_bridge) {
-           parse_bdg_cfg();
-           bdg_promisc_on();
-       }
-    }
-    return error ;
-}
-
-static char bridge_cfg[256] = { "" } ;
-
-/*
- * parse the config string, set IFF_USED, name and cluster_id
- * for all interfaces found.
- */
-static void
-parse_bdg_cfg()
-{
-    char *p, *beg ;
-    int i, l, cluster;
-    struct bdg_softc *b;
-
-    for (p= bridge_cfg; *p ; p++) {
-       /* interface names begin with [a-z]  and continue up to ':' */
-       if (*p < 'a' || *p > 'z')
-           continue ;
-       for ( beg = p ; *p && *p != ':' ; p++ )
-           ;
-       if (*p == 0) /* end of string, ':' not found */
-           return ;
-       l = p - beg ; /* length of name string */
-       p++ ;
-       DEB(printf("-- match beg(%d) <%s> p <%s>\n", l, beg, p);)
-       for (cluster = 0 ; *p && *p >= '0' && *p <= '9' ; p++)
-           cluster = cluster*10 + (*p -'0');
-       /*
-        * now search in bridge strings
-        */
-       for (i=0, b = ifp2sc ; i < if_index ; i++, b++) {
-           char buf[32];
-           struct ifnet *ifp = b->ifp ;
-
-           if (ifp == NULL)
-               continue;
-           sprintf(buf, "%s%d", ifp->if_name, ifp->if_unit);
-           if (!strncmp(beg, buf, l)) { /* XXX not correct for >10 if! */
-               b->cluster_id = htons(cluster) ;
-               b->flags |= IFF_USED ;
-               sprintf(bdg_stats.s[ifp->if_index].name,
-                       "%s%d:%d", ifp->if_name, ifp->if_unit, cluster);
-
-               DEB(printf("--++  found %s\n",
-                   bdg_stats.s[ifp->if_index].name);)
-               break ;
-           }
-       }
-       if (*p == '\0')
-           break ;
-    }
-}
-
-static int
-sysctl_bdg_cfg(SYSCTL_HANDLER_ARGS)
-{
-    int error = 0 ;
-    char oldval[256] ;
-
-    strlcpy(oldval, bridge_cfg, sizeof (oldval));
-
-    error = sysctl_handle_string(oidp,
-           bridge_cfg, oidp->oid_arg2, req);
-    DEB(
-       printf("called sysctl for bridge name %s arg2 %d err %d val %s->%s\n",
-               oidp->oid_name, oidp->oid_arg2,
-               error,
-               oldval, bridge_cfg);
-       )
-    if (strcmp(oldval, bridge_cfg)) {
-       bdg_promisc_off( 1 );  /* reset previously-used interfaces */
-       flush_table();
-       parse_bdg_cfg();        /* and set new ones... */
-       if (do_bridge)
-           bdg_promisc_on();   /* re-enable interfaces */
-    }
-    return error ;
-}
-
-static int
-sysctl_refresh(SYSCTL_HANDLER_ARGS)
-{
-    if (req->newptr)
-           bdgtakeifaces();
-    
-    return 0;
-}
-
-
-SYSCTL_DECL(_net_link_ether);
-SYSCTL_PROC(_net_link_ether, OID_AUTO, bridge_cfg, CTLTYPE_STRING|CTLFLAG_RW,
-           &bridge_cfg, sizeof(bridge_cfg), &sysctl_bdg_cfg, "A",
-           "Bridge configuration");
-
-SYSCTL_PROC(_net_link_ether, OID_AUTO, bridge, CTLTYPE_INT|CTLFLAG_RW,
-           &do_bridge, 0, &sysctl_bdg, "I", "Bridging");
-
-SYSCTL_INT(_net_link_ether, OID_AUTO, bridge_ipfw, CTLFLAG_RW,
-           &bdg_ipfw,0,"Pass bridged pkts through firewall");
-
-#define SY(parent, var, comment)                       \
-       static int var ;                                \
-       SYSCTL_INT(parent, OID_AUTO, var, CTLFLAG_RW, &(var), 0, comment);
-
-int bdg_ipfw_drops;
-SYSCTL_INT(_net_link_ether, OID_AUTO, bridge_ipfw_drop,
-       CTLFLAG_RW, &bdg_ipfw_drops,0,"");
-
-int bdg_ipfw_colls;
-SYSCTL_INT(_net_link_ether, OID_AUTO, bridge_ipfw_collisions,
-       CTLFLAG_RW, &bdg_ipfw_colls,0,"");
-
-SYSCTL_PROC(_net_link_ether, OID_AUTO, bridge_refresh, CTLTYPE_INT|CTLFLAG_WR,
-           NULL, 0, &sysctl_refresh, "I", "iface refresh");
-
-#if 1 /* diagnostic vars */
-
-SY(_net_link_ether, verbose, "Be verbose");
-SY(_net_link_ether, bdg_split_pkts, "Packets split in bdg_forward");
-
-SY(_net_link_ether, bdg_thru, "Packets through bridge");
-
-SY(_net_link_ether, bdg_copied, "Packets copied in bdg_forward");
-
-SY(_net_link_ether, bdg_copy, "Force copy in bdg_forward");
-SY(_net_link_ether, bdg_predict, "Correctly predicted header location");
-
-SY(_net_link_ether, bdg_fw_avg, "Cycle counter avg");
-SY(_net_link_ether, bdg_fw_ticks, "Cycle counter item");
-SY(_net_link_ether, bdg_fw_count, "Cycle counter count");
-#endif
-
-SYSCTL_STRUCT(_net_link_ether, PF_BDG, bdgstats,
-        CTLFLAG_RD, &bdg_stats , bdg_stats, "bridge statistics");
-
-static int bdg_loops ;
-
-/*
- * completely flush the bridge table.
- */
-static void
-flush_table()
-{   
-    int s,i;
-
-    if (bdg_table == NULL)
-       return ;
-    s = splimp();
-    for (i=0; i< HASH_SIZE; i++)
-        bdg_table[i].name= NULL; /* clear table */
-    splx(s);
-}
-
-/*
- * called periodically to flush entries etc.
- */
-static void
-bdg_timeout(void *dummy)
-{
-    static int slowtimer = 0 ;
-    
-    if (bdg_inted == 0) {
-        bdg_init2(0);
-    } else if (do_bridge) {
-        static int age_index = 0 ; /* index of table position to age */
-        int l = age_index + HASH_SIZE/4 ;
-        /*
-         * age entries in the forwarding table.
-         */
-        if (l > HASH_SIZE)
-            l = HASH_SIZE ;
-        for (; age_index < l ; age_index++)
-            if (bdg_table[age_index].used)
-                bdg_table[age_index].used = 0 ;
-            else if (bdg_table[age_index].name) {
-                /* printf("xx flushing stale entry %d\n", age_index); */
-                bdg_table[age_index].name = NULL ;
-            }
-        if (age_index >= HASH_SIZE)
-            age_index = 0 ;
-        
-        if (--slowtimer <= 0 ) {
-            slowtimer = 5 ;
-            
-            bdg_promisc_on() ; /* we just need unmute, really */
-            bdg_loops = 0 ;
-        }
-    }
-    timeout(bdg_timeout, (void *)0, 2*hz );
-}
-
-/*
- * local MAC addresses are held in a small array. This makes comparisons
- * much faster.
- */
-bdg_addr bdg_addresses[BDG_MAX_PORTS];
-int bdg_ports ;
-
-/*
- * initialization of bridge code. This needs to be done after all
- * interfaces have been configured.
- */
-
-static int bdg_inited = 0;
-
-static void
-bdg_init2(void)
-{
-    if (bdg_inited != 0)
-        return;
-    
-    if (bdg_table == NULL) {
-        bdg_table = (struct hash_table *)
-            _MALLOC(HASH_SIZE * sizeof(struct hash_table),
-                    M_IFADDR, M_WAITOK);
-        if (bdg_table == NULL)
-            return;
-
-        flush_table();
-    }
-
-    if (ifp2sc == NULL) {
-        ifp2sc = _MALLOC(BDG_MAX_PORTS * sizeof(struct bdg_softc),
-                         M_IFADDR, M_WAITOK );
-        if (ifp2sc == NULL)
-            return;
-        
-        bzero(ifp2sc, BDG_MAX_PORTS * sizeof(struct bdg_softc) );
-        bdgtakeifaces();
-    }
-    
-    bdg_inited = 1;
-}
-
-static void
-bdginit(void *dummy)
-{
-    /* Initialize first what can't fail */
-    bzero(&bdg_stats, sizeof(bdg_stats) );
-    do_bridge=0;
-    
-    /* Attempt to initialize the rest and start the timer */
-    bdg_timeout(0);
-}
-    
-void
-bdgtakeifaces(void)
-{
-    int i ;
-    struct ifnet *ifp;
-    bdg_addr *p = bdg_addresses ;
-    struct bdg_softc *bp;
-
-    bdg_ports = 0 ;
-    *bridge_cfg = '\0';
-
-    printf("BRIDGE 010131, have %d interfaces\n", if_index);
-    ifnet_head_lock_shared();
-    for (i = 0 , ifp = ifnet.tqh_first ; i < if_index ;
-               i++, ifp = TAILQ_NEXT(ifp, if_link) )
-               if (ifp->if_type == IFT_ETHER) { /* ethernet ? */
-                       ifnet_lladdr_copy_bytes(ifp, p->etheraddr, ETHER_ADDR_LEN);
-                       bp = &ifp2sc[ifp->if_index] ;
-                       sprintf(bridge_cfg + strlen(bridge_cfg),
-                       "%s%d:1,", ifp->if_name, ifp->if_unit);
-                       printf("-- index %d %s type %d phy %d addrl %d addr %6D\n",
-                               ifp->if_index,
-                               bdg_stats.s[ifp->if_index].name,
-                               (int)ifp->if_type, (int) ifp->if_physical,
-                               (int)ifp->if_addrlen,
-                               p->etheraddr, "." );
-                       p++ ;
-                       bp->ifp = ifp ;
-                       bp->flags = IFF_USED ;
-                       bp->cluster_id = htons(1) ;
-                       bp->magic = 0xDEADBEEF ;
-       
-                       sprintf(bdg_stats.s[ifp->if_index].name,
-                       "%s%d:%d", ifp->if_name, ifp->if_unit,
-                       ntohs(bp->cluster_id));
-                       bdg_ports ++ ;
-               }
-       ifnet_head_done();
-}
-
-/*
- * bridge_in() is invoked to perform bridging decision on input packets.
- *
- * On Input:
- *   eh                Ethernet header of the incoming packet.
- *
- * On Return: destination of packet, one of
- *   BDG_BCAST broadcast
- *   BDG_MCAST  multicast
- *   BDG_LOCAL  is only for a local address (do not forward)
- *   BDG_DROP   drop the packet
- *   ifp       ifp of the destination interface.
- *
- * Forwarding is not done directly to give a chance to some drivers
- * to fetch more of the packet, or simply drop it completely.
- */
-
-struct ifnet *
-bridge_in(struct ifnet *ifp, struct ether_header *eh)
-{
-    int index;
-    struct ifnet *dst , *old ;
-    int dropit = BDG_MUTED(ifp) ;
-
-    /*
-     * hash the source address
-     */
-    index= HASH_FN(eh->ether_shost);
-    bdg_table[index].used = 1 ;
-    old = bdg_table[index].name ;
-    if ( old ) { /* the entry is valid. */
-       IFP_CHK(old, printf("bridge_in-- reading table\n") );
-
-        if (!BDG_MATCH( eh->ether_shost, bdg_table[index].etheraddr) ) {
-           bdg_ipfw_colls++ ;
-           bdg_table[index].name = NULL ;
-        } else if (old != ifp) {
-           /*
-            * found a loop. Either a machine has moved, or there
-            * is a misconfiguration/reconfiguration of the network.
-            * First, do not forward this packet!
-            * Record the relocation anyways; then, if loops persist,
-            * suspect a reconfiguration and disable forwarding
-            * from the old interface.
-            */
-           bdg_table[index].name = ifp ; /* relocate address */
-           printf("-- loop (%d) %6D to %s%d from %s%d (%s)\n",
-                       bdg_loops, eh->ether_shost, ".",
-                       ifp->if_name, ifp->if_unit,
-                       old->if_name, old->if_unit,
-                       BDG_MUTED(old) ? "muted":"active");
-           dropit = 1 ;
-           if ( !BDG_MUTED(old) ) {
-               if (++bdg_loops > 10)
-                   BDG_MUTE(old) ;
-           }
-        }
-    }
-
-    /*
-     * now write the source address into the table
-     */
-    if (bdg_table[index].name == NULL) {
-       DEB(printf("new addr %6D at %d for %s%d\n",
-           eh->ether_shost, ".", index, ifp->if_name, ifp->if_unit);)
-       bcopy(eh->ether_shost, bdg_table[index].etheraddr, 6);
-       bdg_table[index].name = ifp ;
-    }
-    dst = bridge_dst_lookup(eh);
-    /* Return values:
-     *   BDG_BCAST, BDG_MCAST, BDG_LOCAL, BDG_UNKNOWN, BDG_DROP, ifp.
-     * For muted interfaces, the first 3 are changed in BDG_LOCAL,
-     * and others to BDG_DROP. Also, for incoming packets, ifp is changed
-     * to BDG_DROP in case ifp == src . These mods are not necessary
-     * for outgoing packets from ether_output().
-     */
-    BDG_STAT(ifp, BDG_IN);
-    switch ((int)dst) {
-    case (int)BDG_BCAST:
-    case (int)BDG_MCAST:
-    case (int)BDG_LOCAL:
-    case (int)BDG_UNKNOWN:
-    case (int)BDG_DROP:
-       BDG_STAT(ifp, dst);
-       break ;
-    default :
-       if (dst == ifp || dropit )
-           BDG_STAT(ifp, BDG_DROP);
-       else
-           BDG_STAT(ifp, BDG_FORWARD);
-       break ;
-    }
-
-    if ( dropit ) {
-       if (dst == BDG_BCAST || dst == BDG_MCAST || dst == BDG_LOCAL)
-           return BDG_LOCAL ;
-       else
-           return BDG_DROP ;
-    } else {
-       return (dst == ifp ? BDG_DROP : dst ) ;
-    }
-}
-
-/*
- * Forward to dst, excluding src port and muted interfaces.
- * If src == NULL, the pkt comes from ether_output, and dst is the real
- * interface the packet is originally sent to. In this case we must forward
- * it to the whole cluster. We never call bdg_forward ether_output on
- * interfaces which are not part of a cluster.
- *
- * The packet is freed if possible (i.e. surely not of interest for
- * the upper layer), otherwise a copy is left for use by the caller
- * (pointer in m0).
- *
- * It would be more efficient to make bdg_forward() always consume
- * the packet, leaving to the caller the task to check if it needs a copy
- * and get one in case. As it is now, bdg_forward() can sometimes make
- * a copy whereas it is not necessary.
- *
- * XXX be careful about eh, it can be a pointer into *m
- */
-struct mbuf *
-bdg_forward(struct mbuf *m0, struct ether_header *const eh, struct ifnet *dst)
-{
-    struct ifnet *src = m0->m_pkthdr.rcvif; /* could be NULL in output */
-    struct ifnet *ifp, *last = NULL ;
-    int s ;
-    int shared = bdg_copy ; /* someone else is using the mbuf */
-    int once = 0;      /* loop only once */
-    struct ifnet *real_dst = dst ; /* real dst from ether_output */
-#ifdef IPFIREWALL
-    struct ip_fw_chain *rule = NULL ; /* did we match a firewall rule ? */
-#endif
-
-    /*
-     * XXX eh is usually a pointer within the mbuf (some ethernet drivers
-     * do that), so we better copy it before doing anything with the mbuf,
-     * or we might corrupt the header.
-     */
-    struct ether_header save_eh = *eh ;
-
-#if defined(IPFIREWALL) && defined(DUMMYNET)
-    if (m0->m_type == MT_DUMMYNET) {
-       /* extract info from dummynet header */
-       rule = (struct ip_fw_chain *)(m0->m_data) ;
-       m0 = m0->m_next ;
-       src = m0->m_pkthdr.rcvif;
-       shared = 0 ; /* For sure this is our own mbuf. */
-    } else
-#endif
-    bdg_thru++; /* only count once */
-
-    if (src == NULL) /* packet from ether_output */
-               dst = bridge_dst_lookup(eh);
-    if (dst == BDG_DROP) { /* this should not happen */
-               printf("xx bdg_forward for BDG_DROP\n");
-               m_freem(m0);
-               return NULL;
-    }
-    if (dst == BDG_LOCAL) { /* this should not happen as well */
-               printf("xx ouch, bdg_forward for local pkt\n");
-               return m0;
-    }
-    if (dst == BDG_BCAST || dst == BDG_MCAST || dst == BDG_UNKNOWN) {
-               ifp = ifnet_head.tqh_first ; /* scan all ports */
-               once = 0 ;
-               if (dst != BDG_UNKNOWN) /* need a copy for the local stack */
-                       shared = 1 ;
-    } else {
-               ifp = dst ;
-               once = 1 ;
-    }
-    if ( (u_int)(ifp) <= (u_int)BDG_FORWARD )
-               panic("bdg_forward: bad dst");
-
-#ifdef IPFIREWALL
-    /*
-     * Do filtering in a very similar way to what is done in ip_output.
-     * Only if firewall is loaded, enabled, and the packet is not
-     * from ether_output() (src==NULL, or we would filter it twice).
-     * Additional restrictions may apply e.g. non-IP, short packets,
-     * and pkts already gone through a pipe.
-     */
-    if (ip_fw_chk_ptr && bdg_ipfw != 0 && src != NULL) {
-       struct ip *ip ;
-       int i;
-
-       if (rule != NULL) /* dummynet packet, already partially processed */
-           goto forward; /* HACK! I should obey the fw_one_pass */
-       if (ntohs(save_eh.ether_type) != ETHERTYPE_IP)
-           goto forward ; /* not an IP packet, ipfw is not appropriate */
-       if (m0->m_pkthdr.len < sizeof(struct ip) )
-           goto forward ; /* header too short for an IP pkt, cannot filter */
-       /*
-        * i need some amt of data to be contiguous, and in case others need
-        * the packet (shared==1) also better be in the first mbuf.
-        */
-       i = min(m0->m_pkthdr.len, max_protohdr) ;
-       if ( shared || m0->m_len < i) {
-           m0 = m_pullup(m0, i) ;
-           if (m0 == NULL) {
-               printf("-- bdg: pullup failed.\n") ;
-               return NULL ;
-           }
-       }
-
-       /*
-        * before calling the firewall, swap fields the same as IP does.
-        * here we assume the pkt is an IP one and the header is contiguous
-        */
-       ip = mtod(m0, struct ip *);
-       NTOHS(ip->ip_len);
-       NTOHS(ip->ip_off);
-
-       /*
-        * The third parameter to the firewall code is the dst. interface.
-        * Since we apply checks only on input pkts we use NULL.
-        * The firewall knows this is a bridged packet as the cookie ptr
-        * is NULL.
-        */
-       i = (*ip_fw_chk_ptr)(&ip, 0, NULL, NULL /* cookie */, &m0, &rule, NULL);
-       if ( (i & IP_FW_PORT_DENY_FLAG) || m0 == NULL) /* drop */
-           return m0 ;
-       /*
-        * If we get here, the firewall has passed the pkt, but the mbuf
-        * pointer might have changed. Restore ip and the fields NTOHS()'d.
-        */
-       ip = mtod(m0, struct ip *);
-       HTONS(ip->ip_len);
-       HTONS(ip->ip_off);
-
-       if (i == 0) /* a PASS rule.  */
-           goto forward ;
-#ifdef DUMMYNET
-       if (i & IP_FW_PORT_DYNT_FLAG) {
-           /*
-            * Pass the pkt to dummynet, which consumes it.
-            * If shared, make a copy and keep the original.
-            * Need to prepend the ethernet header, optimize the common
-            * case of eh pointing already into the original mbuf.
-            */
-           struct mbuf *m ;
-           if (shared) {
-               m = m_copypacket(m0, M_DONTWAIT);
-               if (m == NULL) {
-                   printf("bdg_fwd: copy(1) failed\n");
-                   return m0;
-               }
-           } else {
-               m = m0 ; /* pass the original to dummynet */
-               m0 = NULL ; /* and nothing back to the caller */
-           }
-           if ( (void *)(eh + 1) == (void *)m->m_data) {
-               m->m_data -= ETHER_HDR_LEN ;
-               m->m_len += ETHER_HDR_LEN ;
-               m->m_pkthdr.len += ETHER_HDR_LEN ;
-               bdg_predict++;
-           } else {
-               M_PREPEND(m, ETHER_HDR_LEN, M_DONTWAIT);
-               if (!m && verbose) printf("M_PREPEND failed\n");
-               if (m == NULL) /* nope... */
-                   return m0 ;
-               bcopy(&save_eh, mtod(m, struct ether_header *), ETHER_HDR_LEN);
-           }
-           dummynet_io((i & 0xffff),DN_TO_BDG_FWD,m,real_dst,NULL,0,rule,0);
-           return m0 ;
-       }
-#endif
-       /*
-        * XXX add divert/forward actions...
-        */
-       /* if none of the above matches, we have to drop the pkt */
-       bdg_ipfw_drops++ ;
-       printf("bdg_forward: No rules match, so dropping packet!\n");
-       return m0 ;
-    }
-forward:
-#endif /* IPFIREWALL */
-    /*
-     * Again, bring up the headers in case of shared bufs to avoid
-     * corruptions in the future.
-     */
-    if ( shared ) {
-        int i = min(m0->m_pkthdr.len, max_protohdr) ;
-
-       m0 = m_pullup(m0, i) ;
-       if (m0 == NULL) {
-           printf("-- bdg: pullup2 failed.\n") ;
-           return NULL ;
-       }
-    }
-    /* now real_dst is used to determine the cluster where to forward */
-    if (src != NULL) /* pkt comes from ether_input */
-       real_dst = src ;
-    for (;;) {
-       if (last) { /* need to forward packet leftover from previous loop */
-           struct mbuf *m ;
-           if (shared == 0 && once ) { /* no need to copy */
-               m = m0 ;
-               m0 = NULL ; /* original is gone */
-           } else {
-               m = m_copypacket(m0, M_DONTWAIT);
-               if (m == NULL) {
-                   printf("bdg_forward: sorry, m_copypacket failed!\n");
-                   return m0 ; /* the original is still there... */
-               }
-           }
-           /*
-            * Add header (optimized for the common case of eh pointing
-            * already into the mbuf) and execute last part of ether_output:
-            * queue pkt and start output if interface not yet active.
-            */
-           if ( (void *)(eh + 1) == (void *)m->m_data) {
-               m->m_data -= ETHER_HDR_LEN ;
-               m->m_len += ETHER_HDR_LEN ;
-               m->m_pkthdr.len += ETHER_HDR_LEN ;
-               bdg_predict++;
-           } else {
-               M_PREPEND(m, ETHER_HDR_LEN, M_DONTWAIT);
-               if (!m && verbose) printf("M_PREPEND failed\n");
-               if (m == NULL)
-                   return m0;
-               bcopy(&save_eh, mtod(m, struct ether_header *), ETHER_HDR_LEN);
-           }
-           s = splimp();
-           if (IF_QFULL(&last->if_snd)) {
-               IF_DROP(&last->if_snd);
-#if 0
-               BDG_MUTE(last); /* should I also mute ? */
-#endif
-               splx(s);
-               m_freem(m); /* consume the pkt anyways */
-           } else {
-               last->if_obytes += m->m_pkthdr.len ;
-               if (m->m_flags & M_MCAST)
-                   last->if_omcasts++;
-               if (m->m_pkthdr.len != m->m_len) /* this pkt is on >1 bufs */
-                   bdg_split_pkts++;
-
-               IF_ENQUEUE(&last->if_snd, m);
-               if ((last->if_flags & IFF_OACTIVE) == 0)
-                   (*last->if_start)(last);
-               splx(s);
-           }
-           BDG_STAT(last, BDG_OUT);
-           last = NULL ;
-           if (once)
-               break ;
-       }
-       if (ifp == NULL)
-           break ;
-       /*
-        * If the interface is used for bridging, not muted, not full,
-        * up and running, is not the source interface, and belongs to
-        * the same cluster as the 'real_dst', then send here.
-        */
-       if ( BDG_USED(ifp) && !BDG_MUTED(ifp) && !IF_QFULL(&ifp->if_snd)  &&
-            (ifp->if_flags & (IFF_UP|IFF_RUNNING)) == (IFF_UP|IFF_RUNNING) &&
-            ifp != src && BDG_SAMECLUSTER(ifp, real_dst) )
-           last = ifp ;
-       ifp = TAILQ_NEXT(ifp, if_link) ;
-       if (ifp == NULL)
-           once = 1 ;
-    }
-    DEB(bdg_fw_ticks += (u_int32_t)(rdtsc() - ticks) ; bdg_fw_count++ ;
-       if (bdg_fw_count != 0) bdg_fw_avg = bdg_fw_ticks/bdg_fw_count; )
-    return m0 ;
-}
diff --git a/bsd/net/bridge.h b/bsd/net/bridge.h
deleted file mode 100644 (file)
index faeff42..0000000
+++ /dev/null
@@ -1,211 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * Copyright (c) 1998 Luigi Rizzo
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- */
-#ifndef _NET_BRIDGE_H_
-#define _NET_BRIDGE_H_
-#include <sys/appleapiopts.h>
-
-#warning This is not used by Darwin, do not include
-
-extern int do_bridge;
-/*
- * the hash table for bridge
- */
-typedef struct hash_table {
-    struct ifnet *name ;
-    unsigned char etheraddr[6] ;
-    unsigned short used ;
-} bdg_hash_table ;
-
-extern bdg_hash_table *bdg_table ;
-
-/*
- * We need additional info for the bridge. The bdg_ifp2sc[] array
- * provides a pointer to this struct using the if_index.   
- * bdg_softc has a backpointer to the struct ifnet, the bridge
- * flags, and a cluster (bridging occurs only between port of the
- * same cluster).
- */
-struct bdg_softc {
-    struct ifnet *ifp ;
-    /* also ((struct arpcom *)ifp)->ac_enaddr is the eth. addr */
-    int flags ;
-#define IFF_BDG_PROMISC 0x0001  /* set promisc mode on this if.  */
-#define IFF_MUTE        0x0002  /* mute this if for bridging.   */
-#define IFF_USED        0x0004  /* use this if for bridging.    */
-    short cluster_id ; /* in network format */
-    uint32_t magic;
-} ;
-
-extern struct bdg_softc *ifp2sc;
-
-#define BDG_USED(ifp) (ifp2sc[ifp->if_index].flags & IFF_USED)
-#define BDG_MUTED(ifp) (ifp2sc[ifp->if_index].flags & IFF_MUTE)
-#define BDG_MUTE(ifp) ifp2sc[ifp->if_index].flags |= IFF_MUTE
-#define BDG_UNMUTE(ifp) ifp2sc[ifp->if_index].flags &= ~IFF_MUTE
-#define BDG_CLUSTER(ifp) (ifp2sc[ifp->if_index].cluster_id)
-
-#define BDG_SAMECLUSTER(ifp,src) \
-       (src == NULL || BDG_CLUSTER(ifp) == BDG_CLUSTER(src) )
-
-
-#define BDG_MAX_PORTS 128
-typedef struct _bdg_addr {
-    unsigned char etheraddr[6] ;
-    short cluster_id ;
-} bdg_addr ;
-extern bdg_addr bdg_addresses[BDG_MAX_PORTS];
-extern int bdg_ports ;
-
-/*
- * out of the 6 bytes, the last ones are more "variable". Since
- * we are on a little endian machine, we have to do some gimmick...
- */
-#define HASH_SIZE 8192 /* must be a power of 2 */
-#define HASH_FN(addr)   (      \
-       ntohs( ((short *)addr)[1] ^ ((short *)addr)[2] ) & (HASH_SIZE -1))
-
-#define        IFF_MUTE        IFF_LINK2       /* will need a separate flag... */
-
-struct ifnet *bridge_in(struct ifnet *ifp, struct ether_header *eh);
-/* bdg_forward frees the mbuf if necessary, returning null */
-struct mbuf *bdg_forward(struct mbuf *m0, struct ether_header *eh, struct ifnet *dst);
-
-#ifdef __i386__
-#define BDG_MATCH(a,b) ( \
-    ((unsigned short *)(a))[2] == ((unsigned short *)(b))[2] && \
-    *((unsigned int *)(a)) == *((unsigned int *)(b)) )
-#define IS_ETHER_BROADCAST(a) ( \
-       *((unsigned int *)(a)) == 0xffffffff && \
-       ((unsigned short *)(a))[2] == 0xffff )
-#else
-#warning... must complete these for the alpha etc.
-#define BDG_MATCH(a,b) (!bcmp(a, b, ETHER_ADDR_LEN) )
-#endif
-/*
- * The following constants are not legal ifnet pointers, and are used
- * as return values from the classifier, bridge_dst_lookup()
- * The same values are used as index in the statistics arrays,
- * with BDG_FORWARD replacing specifically forwarded packets.
- */
-#define BDG_BCAST      ( (struct ifnet *)1 )
-#define BDG_MCAST      ( (struct ifnet *)2 )
-#define BDG_LOCAL      ( (struct ifnet *)3 )
-#define BDG_DROP       ( (struct ifnet *)4 )
-#define BDG_UNKNOWN    ( (struct ifnet *)5 )
-#define BDG_IN         ( (struct ifnet *)7 )
-#define BDG_OUT                ( (struct ifnet *)8 )
-#define BDG_FORWARD    ( (struct ifnet *)9 )
-
-#define PF_BDG 3 /* XXX superhack */
-/*
- * statistics, passed up with sysctl interface and ns -p bdg
- */
-
-#define STAT_MAX (int)BDG_FORWARD
-struct bdg_port_stat {
-    char name[16];
-    uint32_t collisions;
-    uint32_t p_in[STAT_MAX+1];
-} ;
-
-struct bdg_stats {
-    struct bdg_port_stat s[16];
-} ;
-
-
-#define BDG_STAT(ifp, type) bdg_stats.s[ifp->if_index].p_in[(int)type]++ 
-#ifdef KERNEL
-/*
- * Find the right pkt destination:
- *     BDG_BCAST       is a broadcast
- *     BDG_MCAST       is a multicast
- *     BDG_LOCAL       is for a local address
- *     BDG_DROP        must be dropped
- *     other           ifp of the dest. interface (incl.self)
- *
- * We assume this is only called for interfaces for which bridging
- * is enabled, i.e. BDG_USED(ifp) is true.
- */
-static __inline
-struct ifnet *
-bridge_dst_lookup(struct ether_header *eh)
-{
-    struct ifnet *dst ;
-    int index ;
-    bdg_addr *p ;
-
-    if (IS_ETHER_BROADCAST(eh->ether_dhost))
-       return BDG_BCAST ;
-    if (eh->ether_dhost[0] & 1)
-       return BDG_MCAST ;
-    /*
-     * Lookup local addresses in case one matches.
-     */
-    for (index = bdg_ports, p = bdg_addresses ; index ; index--, p++ )
-       if (BDG_MATCH(p->etheraddr, eh->ether_dhost) )
-           return BDG_LOCAL ;
-    /*
-     * Look for a possible destination in table
-     */
-    index= HASH_FN( eh->ether_dhost );
-    dst = bdg_table[index].name;
-    if ( dst && BDG_MATCH( bdg_table[index].etheraddr, eh->ether_dhost) )
-       return dst ;
-    else
-       return BDG_UNKNOWN ;
-}
-
-#endif /* KERNEL */
-
-#endif /* _NET_BRIDGE_H_ */
diff --git a/bsd/net/bridgestp.c b/bsd/net/bridgestp.c
new file mode 100644 (file)
index 0000000..1c89582
--- /dev/null
@@ -0,0 +1,1153 @@
+/*
+ * Copyright (c) 2007-2009 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+/*     $fpwf: Revision 1.2  2007/05/17 03:38:46  rnewberry Exp $       */
+/*     $NetBSD: bridgestp.c,v 1.10 2006/11/16 01:33:40 christos Exp $  */
+
+/*
+ * Copyright (c) 2000 Jason L. Wright (jason@thought.net)
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by Jason L. Wright
+ * 4. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * OpenBSD: bridgestp.c,v 1.5 2001/03/22 03:48:29 jason Exp
+ */
+
+/*
+ * Implementation of the spanning tree protocol as defined in
+ * ISO/IEC Final DIS 15802-3 (IEEE P802.1D/D17), May 25, 1998.
+ * (In English: IEEE 802.1D, Draft 17, 1998)
+ */
+
+/*     $NetBSD: if_bridgevar.h,v 1.8 2005/12/10 23:21:38 elad Exp $    */
+
+#include <sys/cdefs.h>
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+#include <sys/kernel.h>
+#include <sys/callout.h>
+
+#include <net/if.h>
+#include <net/if_dl.h>
+#include <net/if_types.h>
+#include <net/if_llc.h>
+
+#include <net/if_ether.h>
+#include <net/if_bridgevar.h>
+#include <net/if_media.h>
+
+#include <net/kpi_interface.h>
+
+/* BPDU message types */
+#define        BSTP_MSGTYPE_CFG        0x00            /* Configuration */
+#define        BSTP_MSGTYPE_TCN        0x80            /* Topology chg notification */
+
+/* BPDU flags */
+#define        BSTP_FLAG_TC            0x01            /* Topology change */
+#define        BSTP_FLAG_TCA           0x80            /* Topology change ack */
+
+#define        BSTP_MESSAGE_AGE_INCR   (1 * 256)       /* in 256ths of a second */
+#define        BSTP_TICK_VAL           (1 * 256)       /* in 256ths of a second */
+
+/*
+ * Because BPDU's do not make nicely aligned structures, two different
+ * declarations are used: bstp_?bpdu (wire representation, packed) and
+ * bstp_*_unit (internal, nicely aligned version).
+ */
+
+/* configuration bridge protocol data unit */
+struct bstp_cbpdu {
+       uint8_t         cbu_dsap;               /* LLC: destination sap */
+       uint8_t         cbu_ssap;               /* LLC: source sap */
+       uint8_t         cbu_ctl;                /* LLC: control */
+       uint16_t        cbu_protoid;            /* protocol id */
+       uint8_t         cbu_protover;           /* protocol version */
+       uint8_t         cbu_bpdutype;           /* message type */
+       uint8_t         cbu_flags;              /* flags (below) */
+
+       /* root id */
+       uint16_t        cbu_rootpri;            /* root priority */
+       uint8_t cbu_rootaddr[6];        /* root address */
+
+       uint32_t        cbu_rootpathcost;       /* root path cost */
+
+       /* bridge id */
+       uint16_t        cbu_bridgepri;          /* bridge priority */
+       uint8_t         cbu_bridgeaddr[6];      /* bridge address */
+
+       uint16_t        cbu_portid;             /* port id */
+       uint16_t        cbu_messageage;         /* current message age */
+       uint16_t        cbu_maxage;             /* maximum age */
+       uint16_t        cbu_hellotime;          /* hello time */
+       uint16_t        cbu_forwarddelay;       /* forwarding delay */
+} __attribute__((__packed__));
+
+/* topology change notification bridge protocol data unit */
+struct bstp_tbpdu {
+       uint8_t         tbu_dsap;               /* LLC: destination sap */
+       uint8_t         tbu_ssap;               /* LLC: source sap */
+       uint8_t         tbu_ctl;                /* LLC: control */
+       uint16_t        tbu_protoid;            /* protocol id */
+       uint8_t         tbu_protover;           /* protocol version */
+       uint8_t         tbu_bpdutype;           /* message type */
+} __attribute__((__packed__));
+
+const uint8_t bstp_etheraddr[] = { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 };
+
+void   bstp_initialize_port(struct bridge_softc *, struct bridge_iflist *);
+void   bstp_ifupdstatus(struct bridge_softc *, struct bridge_iflist *);
+void   bstp_enable_port(struct bridge_softc *, struct bridge_iflist *);
+void   bstp_disable_port(struct bridge_softc *, struct bridge_iflist *);
+void   bstp_enable_change_detection(struct bridge_iflist *);
+void   bstp_disable_change_detection(struct bridge_iflist *);
+int    bstp_root_bridge(struct bridge_softc *sc);
+int    bstp_supersedes_port_info(struct bridge_softc *,
+           struct bridge_iflist *, struct bstp_config_unit *);
+int    bstp_designated_port(struct bridge_softc *, struct bridge_iflist *);
+int    bstp_designated_for_some_port(struct bridge_softc *);
+void   bstp_transmit_config(struct bridge_softc *, struct bridge_iflist *);
+void   bstp_transmit_tcn(struct bridge_softc *);
+void   bstp_received_config_bpdu(struct bridge_softc *,
+           struct bridge_iflist *, struct bstp_config_unit *);
+void   bstp_received_tcn_bpdu(struct bridge_softc *, struct bridge_iflist *,
+           struct bstp_tcn_unit *);
+void   bstp_record_config_information(struct bridge_softc *,
+           struct bridge_iflist *, struct bstp_config_unit *);
+void   bstp_record_config_timeout_values(struct bridge_softc *,
+           struct bstp_config_unit *);
+void   bstp_config_bpdu_generation(struct bridge_softc *);
+void   bstp_send_config_bpdu(struct bridge_softc *, struct bridge_iflist *,
+           struct bstp_config_unit *);
+void   bstp_configuration_update(struct bridge_softc *);
+void   bstp_root_selection(struct bridge_softc *);
+void   bstp_designated_port_selection(struct bridge_softc *);
+void   bstp_become_designated_port(struct bridge_softc *,
+           struct bridge_iflist *);
+void   bstp_port_state_selection(struct bridge_softc *);
+void   bstp_make_forwarding(struct bridge_softc *, struct bridge_iflist *);
+void   bstp_make_blocking(struct bridge_softc *, struct bridge_iflist *);
+void   bstp_set_port_state(struct bridge_iflist *, uint8_t);
+void   bstp_set_bridge_priority(struct bridge_softc *, uint64_t);
+void   bstp_set_port_priority(struct bridge_softc *, struct bridge_iflist *,
+           uint16_t);
+void   bstp_set_path_cost(struct bridge_softc *, struct bridge_iflist *,
+           uint32_t);
+void   bstp_topology_change_detection(struct bridge_softc *);
+void   bstp_topology_change_acknowledged(struct bridge_softc *);
+void   bstp_acknowledge_topology_change(struct bridge_softc *,
+           struct bridge_iflist *);
+
+void   bstp_tick(void *);
+void   bstp_timer_start(struct bridge_timer *, uint16_t);
+void   bstp_timer_stop(struct bridge_timer *);
+int    bstp_timer_expired(struct bridge_timer *, uint16_t);
+
+void   bstp_hold_timer_expiry(struct bridge_softc *, struct bridge_iflist *);
+void   bstp_message_age_timer_expiry(struct bridge_softc *,
+           struct bridge_iflist *);
+void   bstp_forward_delay_timer_expiry(struct bridge_softc *,
+           struct bridge_iflist *);
+void   bstp_topology_change_timer_expiry(struct bridge_softc *);
+void   bstp_tcn_timer_expiry(struct bridge_softc *);
+void   bstp_hello_timer_expiry(struct bridge_softc *);
+
+void
+bstp_transmit_config(struct bridge_softc *sc, struct bridge_iflist *bif)
+{
+       if (bif->bif_hold_timer.active) {
+               bif->bif_config_pending = 1;
+               return;
+       }
+
+       bif->bif_config_bpdu.cu_message_type = BSTP_MSGTYPE_CFG;
+       bif->bif_config_bpdu.cu_rootid = sc->sc_designated_root;
+       bif->bif_config_bpdu.cu_root_path_cost = sc->sc_root_path_cost;
+       bif->bif_config_bpdu.cu_bridge_id = sc->sc_bridge_id;
+       bif->bif_config_bpdu.cu_port_id = bif->bif_port_id;
+
+       if (bstp_root_bridge(sc))
+               bif->bif_config_bpdu.cu_message_age = 0;
+       else
+               bif->bif_config_bpdu.cu_message_age =
+                   sc->sc_root_port->bif_message_age_timer.value +
+                   BSTP_MESSAGE_AGE_INCR;
+
+       bif->bif_config_bpdu.cu_max_age = sc->sc_max_age;
+       bif->bif_config_bpdu.cu_hello_time = sc->sc_hello_time;
+       bif->bif_config_bpdu.cu_forward_delay = sc->sc_forward_delay;
+       bif->bif_config_bpdu.cu_topology_change_acknowledgment
+           = bif->bif_topology_change_acknowledge;
+       bif->bif_config_bpdu.cu_topology_change = sc->sc_topology_change;
+
+       if (bif->bif_config_bpdu.cu_message_age < sc->sc_max_age) {
+               bif->bif_topology_change_acknowledge = 0;
+               bif->bif_config_pending = 0;
+               bstp_send_config_bpdu(sc, bif, &bif->bif_config_bpdu);
+               bstp_timer_start(&bif->bif_hold_timer, 0);
+       }
+}
+
+void
+bstp_send_config_bpdu(struct bridge_softc *sc, struct bridge_iflist *bif,
+    struct bstp_config_unit *cu)
+{
+       struct ifnet *ifp;
+       struct mbuf *m;
+       struct ether_header *eh;
+       struct bstp_cbpdu bpdu;
+
+       ifp = bif->bif_ifp;
+
+       if ((ifp->if_flags & IFF_RUNNING) == 0)
+               return;
+
+       MGETHDR(m, M_DONTWAIT, MT_DATA);
+       if (m == NULL)
+               return;
+
+       eh = mtod(m, struct ether_header *);
+
+       m->m_pkthdr.rcvif = ifp;
+       m->m_pkthdr.len = sizeof(*eh) + sizeof(bpdu);
+       m->m_len = m->m_pkthdr.len;
+
+       bpdu.cbu_ssap = bpdu.cbu_dsap = LLC_8021D_LSAP;
+       bpdu.cbu_ctl = LLC_UI;
+       bpdu.cbu_protoid = htons(0);
+       bpdu.cbu_protover = 0;
+       bpdu.cbu_bpdutype = cu->cu_message_type;
+       bpdu.cbu_flags = (cu->cu_topology_change ? BSTP_FLAG_TC : 0) |
+           (cu->cu_topology_change_acknowledgment ? BSTP_FLAG_TCA : 0);
+
+       bpdu.cbu_rootpri = htons(cu->cu_rootid >> 48);
+       bpdu.cbu_rootaddr[0] = cu->cu_rootid >> 40;
+       bpdu.cbu_rootaddr[1] = cu->cu_rootid >> 32;
+       bpdu.cbu_rootaddr[2] = cu->cu_rootid >> 24;
+       bpdu.cbu_rootaddr[3] = cu->cu_rootid >> 16;
+       bpdu.cbu_rootaddr[4] = cu->cu_rootid >> 8;
+       bpdu.cbu_rootaddr[5] = cu->cu_rootid >> 0;
+
+       bpdu.cbu_rootpathcost = htonl(cu->cu_root_path_cost);
+
+       bpdu.cbu_bridgepri = htons(cu->cu_rootid >> 48);
+       bpdu.cbu_bridgeaddr[0] = cu->cu_rootid >> 40;
+       bpdu.cbu_bridgeaddr[1] = cu->cu_rootid >> 32;
+       bpdu.cbu_bridgeaddr[2] = cu->cu_rootid >> 24;
+       bpdu.cbu_bridgeaddr[3] = cu->cu_rootid >> 16;
+       bpdu.cbu_bridgeaddr[4] = cu->cu_rootid >> 8;
+       bpdu.cbu_bridgeaddr[5] = cu->cu_rootid >> 0;
+
+       bpdu.cbu_portid = htons(cu->cu_port_id);
+       bpdu.cbu_messageage = htons(cu->cu_message_age);
+       bpdu.cbu_maxage = htons(cu->cu_max_age);
+       bpdu.cbu_hellotime = htons(cu->cu_hello_time);
+       bpdu.cbu_forwarddelay = htons(cu->cu_forward_delay);
+
+       memcpy(eh->ether_shost, ifnet_lladdr(ifp), ETHER_ADDR_LEN);
+       memcpy(eh->ether_dhost, bstp_etheraddr, ETHER_ADDR_LEN);
+       eh->ether_type = htons(sizeof(bpdu));
+
+       memcpy(mtod(m, caddr_t) + sizeof(*eh), &bpdu, sizeof(bpdu));
+
+       bridge_enqueue(sc, ifp, m); // APPLE MODIFICATION - no flags param
+}
+
+int
+bstp_root_bridge(struct bridge_softc *sc)
+{
+       return (sc->sc_designated_root == sc->sc_bridge_id);
+}
+
+int
+bstp_supersedes_port_info(struct bridge_softc *sc, struct bridge_iflist *bif,
+    struct bstp_config_unit *cu)
+{
+       if (cu->cu_rootid < bif->bif_designated_root)
+               return (1);
+       if (cu->cu_rootid > bif->bif_designated_root)
+               return (0);
+
+       if (cu->cu_root_path_cost < bif->bif_designated_cost)
+               return (1);
+       if (cu->cu_root_path_cost > bif->bif_designated_cost)
+               return (0);
+
+       if (cu->cu_bridge_id < bif->bif_designated_bridge)
+               return (1);
+       if (cu->cu_bridge_id > bif->bif_designated_bridge)
+               return (0);
+
+       if (sc->sc_bridge_id != cu->cu_bridge_id)
+               return (1);
+       if (cu->cu_port_id <= bif->bif_designated_port)
+               return (1);
+       return (0);
+}
+
+void
+bstp_record_config_information(__unused struct bridge_softc *sc,
+    struct bridge_iflist *bif, struct bstp_config_unit *cu)
+{
+       bif->bif_designated_root = cu->cu_rootid;
+       bif->bif_designated_cost = cu->cu_root_path_cost;
+       bif->bif_designated_bridge = cu->cu_bridge_id;
+       bif->bif_designated_port = cu->cu_port_id;
+       bstp_timer_start(&bif->bif_message_age_timer, cu->cu_message_age);
+}
+
+void
+bstp_record_config_timeout_values(struct bridge_softc *sc,
+    struct bstp_config_unit *config)
+{
+       sc->sc_max_age = config->cu_max_age;
+       sc->sc_hello_time = config->cu_hello_time;
+       sc->sc_forward_delay = config->cu_forward_delay;
+       sc->sc_topology_change = config->cu_topology_change;
+}
+
+void
+bstp_config_bpdu_generation(struct bridge_softc *sc)
+{
+       struct bridge_iflist *bif;
+
+       LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
+               if ((bif->bif_flags & IFBIF_STP) == 0)
+                       continue;
+               if (bstp_designated_port(sc, bif) &&
+                   (bif->bif_state != BSTP_IFSTATE_DISABLED))
+                       bstp_transmit_config(sc, bif);
+       }
+}
+
+int
+bstp_designated_port(struct bridge_softc *sc, struct bridge_iflist *bif)
+{
+       return ((bif->bif_designated_bridge == sc->sc_bridge_id)
+           && (bif->bif_designated_port == bif->bif_port_id));
+}
+
+void
+bstp_transmit_tcn(struct bridge_softc *sc)
+{
+       struct bstp_tbpdu bpdu;
+       struct bridge_iflist *bif = sc->sc_root_port;
+       struct ifnet *ifp;
+       struct ether_header *eh;
+       struct mbuf *m;
+
+       KASSERT(bif != NULL, "bstp_transmit_tcn bif NULL");
+       ifp = bif->bif_ifp;
+       if ((ifp->if_flags & IFF_RUNNING) == 0)
+               return;
+
+       MGETHDR(m, M_DONTWAIT, MT_DATA);
+       if (m == NULL)
+               return;
+
+       m->m_pkthdr.rcvif = ifp;
+       m->m_pkthdr.len = sizeof(*eh) + sizeof(bpdu);
+       m->m_len = m->m_pkthdr.len;
+
+       eh = mtod(m, struct ether_header *);
+
+       memcpy(eh->ether_shost, ifnet_lladdr(ifp), ETHER_ADDR_LEN);
+       memcpy(eh->ether_dhost, bstp_etheraddr, ETHER_ADDR_LEN);
+       eh->ether_type = htons(sizeof(bpdu));
+
+       bpdu.tbu_ssap = bpdu.tbu_dsap = LLC_8021D_LSAP;
+       bpdu.tbu_ctl = LLC_UI;
+       bpdu.tbu_protoid = 0;
+       bpdu.tbu_protover = 0;
+       bpdu.tbu_bpdutype = BSTP_MSGTYPE_TCN;
+
+       memcpy(mtod(m, caddr_t) + sizeof(*eh), &bpdu, sizeof(bpdu));
+
+       bridge_enqueue(sc, ifp, m); // APPLE MODIFICATION - no flags param
+}
+
+void
+bstp_configuration_update(struct bridge_softc *sc)
+{
+       bstp_root_selection(sc);
+       bstp_designated_port_selection(sc);
+}
+
+void
+bstp_root_selection(struct bridge_softc *sc)
+{
+       struct bridge_iflist *root_port = NULL, *bif;
+
+       LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
+               if ((bif->bif_flags & IFBIF_STP) == 0)
+                       continue;
+               if (bstp_designated_port(sc, bif))
+                       continue;
+               if (bif->bif_state == BSTP_IFSTATE_DISABLED)
+                       continue;
+               if (bif->bif_designated_root >= sc->sc_bridge_id)
+                       continue;
+               if (root_port == NULL)
+                       goto set_port;
+
+               if (bif->bif_designated_root < root_port->bif_designated_root)
+                       goto set_port;
+               if (bif->bif_designated_root > root_port->bif_designated_root)
+                       continue;
+
+               if ((bif->bif_designated_cost + bif->bif_path_cost) <
+                   (root_port->bif_designated_cost + root_port->bif_path_cost))
+                       goto set_port;
+               if ((bif->bif_designated_cost + bif->bif_path_cost) >
+                   (root_port->bif_designated_cost + root_port->bif_path_cost))
+                       continue;
+
+               if (bif->bif_designated_bridge <
+                   root_port->bif_designated_bridge)
+                       goto set_port;
+               if (bif->bif_designated_bridge >
+                   root_port->bif_designated_bridge)
+                       continue;
+
+               if (bif->bif_designated_port < root_port->bif_designated_port)
+                       goto set_port;
+               if (bif->bif_designated_port > root_port->bif_designated_port)
+                       continue;
+
+               if (bif->bif_port_id >= root_port->bif_port_id)
+                       continue;
+set_port:
+               root_port = bif;
+       }
+
+       sc->sc_root_port = root_port;
+       if (root_port == NULL) {
+               sc->sc_designated_root = sc->sc_bridge_id;
+               sc->sc_root_path_cost = 0;
+       } else {
+               sc->sc_designated_root = root_port->bif_designated_root;
+               sc->sc_root_path_cost = root_port->bif_designated_cost +
+                   root_port->bif_path_cost;
+       }
+}
+
+void
+bstp_designated_port_selection(struct bridge_softc *sc)
+{
+       struct bridge_iflist *bif;
+
+       LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
+               if ((bif->bif_flags & IFBIF_STP) == 0)
+                       continue;
+               if (bstp_designated_port(sc, bif))
+                       goto designated;
+               if (bif->bif_designated_root != sc->sc_designated_root)
+                       goto designated;
+
+               if (sc->sc_root_path_cost < bif->bif_designated_cost)
+                       goto designated;
+               if (sc->sc_root_path_cost > bif->bif_designated_cost)
+                       continue;
+
+               if (sc->sc_bridge_id < bif->bif_designated_bridge)
+                       goto designated;
+               if (sc->sc_bridge_id > bif->bif_designated_bridge)
+                       continue;
+
+               if (bif->bif_port_id > bif->bif_designated_port)
+                       continue;
+designated:
+               bstp_become_designated_port(sc, bif);
+       }
+}
+
+void
+bstp_become_designated_port(struct bridge_softc *sc, struct bridge_iflist *bif)
+{
+       bif->bif_designated_root = sc->sc_designated_root;
+       bif->bif_designated_cost = sc->sc_root_path_cost;
+       bif->bif_designated_bridge = sc->sc_bridge_id;
+       bif->bif_designated_port = bif->bif_port_id;
+}
+
+void
+bstp_port_state_selection(struct bridge_softc *sc)
+{
+       struct bridge_iflist *bif;
+
+       LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
+               if ((bif->bif_flags & IFBIF_STP) == 0)
+                       continue;
+               if (bif == sc->sc_root_port) {
+                       bif->bif_config_pending = 0;
+                       bif->bif_topology_change_acknowledge = 0;
+                       bstp_make_forwarding(sc, bif);
+               } else if (bstp_designated_port(sc, bif)) {
+                       bstp_timer_stop(&bif->bif_message_age_timer);
+                       bstp_make_forwarding(sc, bif);
+               } else {
+                       bif->bif_config_pending = 0;
+                       bif->bif_topology_change_acknowledge = 0;
+                       bstp_make_blocking(sc, bif);
+               }
+       }
+}
+
+void
+bstp_make_forwarding(__unused struct bridge_softc *sc,
+    struct bridge_iflist *bif)
+{
+       if (bif->bif_state == BSTP_IFSTATE_BLOCKING) {
+               bstp_set_port_state(bif, BSTP_IFSTATE_LISTENING);
+               bstp_timer_start(&bif->bif_forward_delay_timer, 0);
+       }
+}
+
+void
+bstp_make_blocking(struct bridge_softc *sc, struct bridge_iflist *bif)
+{
+       if ((bif->bif_state != BSTP_IFSTATE_DISABLED) &&
+           (bif->bif_state != BSTP_IFSTATE_BLOCKING)) {
+               if ((bif->bif_state == BSTP_IFSTATE_FORWARDING) ||
+                   (bif->bif_state == BSTP_IFSTATE_LEARNING)) {
+                       if (bif->bif_change_detection_enabled) {
+                               bstp_topology_change_detection(sc);
+                       }
+               }
+               bstp_set_port_state(bif, BSTP_IFSTATE_BLOCKING);
+               bstp_timer_stop(&bif->bif_forward_delay_timer);
+       }
+}
+
+void
+bstp_set_port_state(struct bridge_iflist *bif, uint8_t state)
+{
+       bif->bif_state = state;
+}
+
+void
+bstp_topology_change_detection(struct bridge_softc *sc)
+{
+       if (bstp_root_bridge(sc)) {
+               sc->sc_topology_change = 1;
+               bstp_timer_start(&sc->sc_topology_change_timer, 0);
+       } else if (!sc->sc_topology_change_detected) {
+               bstp_transmit_tcn(sc);
+               bstp_timer_start(&sc->sc_tcn_timer, 0);
+       }
+       sc->sc_topology_change_detected = 1;
+}
+
+void
+bstp_topology_change_acknowledged(struct bridge_softc *sc)
+{
+       sc->sc_topology_change_detected = 0;
+       bstp_timer_stop(&sc->sc_tcn_timer);
+}
+
+void
+bstp_acknowledge_topology_change(struct bridge_softc *sc,
+    struct bridge_iflist *bif)
+{
+       bif->bif_topology_change_acknowledge = 1;
+       bstp_transmit_config(sc, bif);
+}
+
+__private_extern__ struct mbuf *
+bstp_input(struct bridge_softc *sc, struct ifnet *ifp, struct mbuf *m)
+{
+       struct bridge_iflist *bif = NULL;
+       struct ether_header *eh;
+       struct bstp_tbpdu tpdu;
+       struct bstp_cbpdu cpdu;
+       struct bstp_config_unit cu;
+       struct bstp_tcn_unit tu;
+       uint16_t len;
+
+       eh = mtod(m, struct ether_header *);
+
+       LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
+               if ((bif->bif_flags & IFBIF_STP) == 0)
+                       continue;
+               if (bif->bif_ifp == ifp)
+                       break;
+       }
+       if (bif == NULL)
+               goto out;
+
+       len = ntohs(eh->ether_type);
+       if (len < sizeof(tpdu))
+               goto out;
+
+       m_adj(m, ETHER_HDR_LEN);
+
+       if (m->m_pkthdr.len > len)
+               m_adj(m, len - m->m_pkthdr.len);
+       if ((size_t)m->m_len < sizeof(tpdu) &&
+           (m = m_pullup(m, sizeof(tpdu))) == NULL)
+               goto out;
+
+       memcpy(&tpdu, mtod(m, caddr_t), sizeof(tpdu));
+
+       if (tpdu.tbu_dsap != LLC_8021D_LSAP ||
+           tpdu.tbu_ssap != LLC_8021D_LSAP ||
+           tpdu.tbu_ctl != LLC_UI)
+               goto out;
+       if (tpdu.tbu_protoid != 0 || tpdu.tbu_protover != 0)
+               goto out;
+
+       switch (tpdu.tbu_bpdutype) {
+       case BSTP_MSGTYPE_TCN:
+               tu.tu_message_type = tpdu.tbu_bpdutype;
+               bstp_received_tcn_bpdu(sc, bif, &tu);
+               break;
+       case BSTP_MSGTYPE_CFG:
+               if ((size_t)m->m_len < sizeof(cpdu) &&
+                   (m = m_pullup(m, sizeof(cpdu))) == NULL)
+                       goto out;
+               memcpy(&cpdu, mtod(m, caddr_t), sizeof(cpdu));
+
+               cu.cu_rootid =
+                   (((uint64_t)ntohs(cpdu.cbu_rootpri)) << 48) |
+                   (((uint64_t)cpdu.cbu_rootaddr[0]) << 40) |
+                   (((uint64_t)cpdu.cbu_rootaddr[1]) << 32) |
+                   (((uint64_t)cpdu.cbu_rootaddr[2]) << 24) |
+                   (((uint64_t)cpdu.cbu_rootaddr[3]) << 16) |
+                   (((uint64_t)cpdu.cbu_rootaddr[4]) << 8) |
+                   (((uint64_t)cpdu.cbu_rootaddr[5]) << 0);
+
+               cu.cu_bridge_id =
+                   (((uint64_t)ntohs(cpdu.cbu_bridgepri)) << 48) |
+                   (((uint64_t)cpdu.cbu_bridgeaddr[0]) << 40) |
+                   (((uint64_t)cpdu.cbu_bridgeaddr[1]) << 32) |
+                   (((uint64_t)cpdu.cbu_bridgeaddr[2]) << 24) |
+                   (((uint64_t)cpdu.cbu_bridgeaddr[3]) << 16) |
+                   (((uint64_t)cpdu.cbu_bridgeaddr[4]) << 8) |
+                   (((uint64_t)cpdu.cbu_bridgeaddr[5]) << 0);
+
+               cu.cu_root_path_cost = ntohl(cpdu.cbu_rootpathcost);
+               cu.cu_message_age = ntohs(cpdu.cbu_messageage);
+               cu.cu_max_age = ntohs(cpdu.cbu_maxage);
+               cu.cu_hello_time = ntohs(cpdu.cbu_hellotime);
+               cu.cu_forward_delay = ntohs(cpdu.cbu_forwarddelay);
+               cu.cu_port_id = ntohs(cpdu.cbu_portid);
+               cu.cu_message_type = cpdu.cbu_bpdutype;
+               cu.cu_topology_change_acknowledgment =
+                   (cpdu.cbu_flags & BSTP_FLAG_TCA) ? 1 : 0;
+               cu.cu_topology_change =
+                   (cpdu.cbu_flags & BSTP_FLAG_TC) ? 1 : 0;
+               bstp_received_config_bpdu(sc, bif, &cu);
+               break;
+       default:
+               goto out;
+       }
+
+ out:
+       if (m)
+               m_freem(m);
+       return (NULL);
+}
+
+void
+bstp_received_config_bpdu(struct bridge_softc *sc, struct bridge_iflist *bif,
+    struct bstp_config_unit *cu)
+{
+       int root;
+
+       root = bstp_root_bridge(sc);
+
+       if (bif->bif_state != BSTP_IFSTATE_DISABLED) {
+               if (bstp_supersedes_port_info(sc, bif, cu)) {
+                       bstp_record_config_information(sc, bif, cu);
+                       bstp_configuration_update(sc);
+                       bstp_port_state_selection(sc);
+
+                       if ((bstp_root_bridge(sc) == 0) && root) {
+                               bstp_timer_stop(&sc->sc_hello_timer);
+
+                               if (sc->sc_topology_change_detected) {
+                                       bstp_timer_stop(
+                                           &sc->sc_topology_change_timer);
+                                       bstp_transmit_tcn(sc);
+                                       bstp_timer_start(&sc->sc_tcn_timer, 0);
+                               }
+                       }
+
+                       if (bif == sc->sc_root_port) {
+                               bstp_record_config_timeout_values(sc, cu);
+                               bstp_config_bpdu_generation(sc);
+
+                               if (cu->cu_topology_change_acknowledgment)
+                                       bstp_topology_change_acknowledged(sc);
+                       }
+               } else if (bstp_designated_port(sc, bif))
+                       bstp_transmit_config(sc, bif);
+       }
+}
+
+void
+bstp_received_tcn_bpdu(struct bridge_softc *sc, struct bridge_iflist *bif,
+    __unused struct bstp_tcn_unit *tcn)
+{
+       if (bif->bif_state != BSTP_IFSTATE_DISABLED &&
+           bstp_designated_port(sc, bif)) {
+               bstp_topology_change_detection(sc);
+               bstp_acknowledge_topology_change(sc, bif);
+       }
+}
+
+void
+bstp_hello_timer_expiry(struct bridge_softc *sc)
+{
+       bstp_config_bpdu_generation(sc);
+       bstp_timer_start(&sc->sc_hello_timer, 0);
+}
+
+void
+bstp_message_age_timer_expiry(struct bridge_softc *sc,
+    struct bridge_iflist *bif)
+{
+       int root;
+
+       root = bstp_root_bridge(sc);
+       bstp_become_designated_port(sc, bif);
+       bstp_configuration_update(sc);
+       bstp_port_state_selection(sc);
+
+       if ((bstp_root_bridge(sc)) && (root == 0)) {
+               sc->sc_max_age = sc->sc_bridge_max_age;
+               sc->sc_hello_time = sc->sc_bridge_hello_time;
+               sc->sc_forward_delay = sc->sc_bridge_forward_delay;
+
+               bstp_topology_change_detection(sc);
+               bstp_timer_stop(&sc->sc_tcn_timer);
+               bstp_config_bpdu_generation(sc);
+               bstp_timer_start(&sc->sc_hello_timer, 0);
+       }
+}
+
+void
+bstp_forward_delay_timer_expiry(struct bridge_softc *sc,
+    struct bridge_iflist *bif)
+{
+       if (bif->bif_state == BSTP_IFSTATE_LISTENING) {
+               bstp_set_port_state(bif, BSTP_IFSTATE_LEARNING);
+               bstp_timer_start(&bif->bif_forward_delay_timer, 0);
+       } else if (bif->bif_state == BSTP_IFSTATE_LEARNING) {
+               bstp_set_port_state(bif, BSTP_IFSTATE_FORWARDING);
+               if (bstp_designated_for_some_port(sc) &&
+                   bif->bif_change_detection_enabled)
+                       bstp_topology_change_detection(sc);
+       }
+}
+
+int
+bstp_designated_for_some_port(struct bridge_softc *sc)
+{
+
+       struct bridge_iflist *bif;
+
+       LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
+               if ((bif->bif_flags & IFBIF_STP) == 0)
+                       continue;
+               if (bif->bif_designated_bridge == sc->sc_bridge_id)
+                       return (1);
+       }
+       return (0);
+}
+
+void
+bstp_tcn_timer_expiry(struct bridge_softc *sc)
+{
+       bstp_transmit_tcn(sc);
+       bstp_timer_start(&sc->sc_tcn_timer, 0);
+}
+
+void
+bstp_topology_change_timer_expiry(struct bridge_softc *sc)
+{
+       sc->sc_topology_change_detected = 0;
+       sc->sc_topology_change = 0;
+}
+
+void
+bstp_hold_timer_expiry(struct bridge_softc *sc, struct bridge_iflist *bif)
+{
+       if (bif->bif_config_pending)
+               bstp_transmit_config(sc, bif);
+}
+
+__private_extern__ void
+bstp_initialization(struct bridge_softc *sc)
+{
+       struct bridge_iflist *bif, *mif;
+       struct timespec ts;
+       unsigned char *lladdr;
+
+       lck_mtx_assert(sc->sc_mtx, LCK_MTX_ASSERT_OWNED);
+
+       mif = NULL;
+       LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
+               if ((bif->bif_flags & IFBIF_STP) == 0)
+                       continue;
+               if (bif->bif_ifp->if_type != IFT_ETHER)
+                       continue;
+               bif->bif_port_id = (bif->bif_priority << 8) |
+                   (bif->bif_ifp->if_index & 0xff);
+
+               if (mif == NULL) {
+                       mif = bif;
+                       continue;
+               }
+               if (memcmp(ifnet_lladdr(bif->bif_ifp),
+                   ifnet_lladdr(mif->bif_ifp), ETHER_ADDR_LEN) < 0) {
+                       mif = bif;
+                       continue;
+               }
+       }
+       if (mif == NULL) {
+               bstp_stop(sc);
+               return;
+       }
+
+       lladdr = ifnet_lladdr(mif->bif_ifp);
+       sc->sc_bridge_id =
+           (((uint64_t)sc->sc_bridge_priority) << 48) |
+           (((uint64_t)lladdr[0]) << 40) |
+           (((uint64_t)lladdr[1]) << 32) |
+           (lladdr[2] << 24) |
+           (lladdr[3] << 16) |
+           (lladdr[4] << 8) |
+           (lladdr[5]);
+
+       sc->sc_designated_root = sc->sc_bridge_id;
+       sc->sc_root_path_cost = 0;
+       sc->sc_root_port = NULL;
+
+       sc->sc_max_age = sc->sc_bridge_max_age;
+       sc->sc_hello_time = sc->sc_bridge_hello_time;
+       sc->sc_forward_delay = sc->sc_bridge_forward_delay;
+       sc->sc_topology_change_detected = 0;
+       sc->sc_topology_change = 0;
+       bstp_timer_stop(&sc->sc_tcn_timer);
+       bstp_timer_stop(&sc->sc_topology_change_timer);
+
+       bsd_untimeout(bstp_tick, sc);
+       ts.tv_sec = 1;
+       ts.tv_nsec = 0;
+       bsd_timeout(bstp_tick, sc, &ts);
+
+       LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
+               if (bif->bif_flags & IFBIF_STP)
+                       bstp_enable_port(sc, bif);
+               else
+                       bstp_disable_port(sc, bif);
+       }
+
+       bstp_port_state_selection(sc);
+       bstp_config_bpdu_generation(sc);
+       bstp_timer_start(&sc->sc_hello_timer, 0);
+}
+
+__private_extern__ void
+bstp_stop(struct bridge_softc *sc)
+{
+       struct bridge_iflist *bif;
+
+       LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
+               bstp_set_port_state(bif, BSTP_IFSTATE_DISABLED);
+               bstp_timer_stop(&bif->bif_hold_timer);
+               bstp_timer_stop(&bif->bif_message_age_timer);
+               bstp_timer_stop(&bif->bif_forward_delay_timer);
+       }
+
+       bsd_untimeout(bstp_tick, sc);
+
+       bstp_timer_stop(&sc->sc_topology_change_timer);
+       bstp_timer_stop(&sc->sc_tcn_timer);
+       bstp_timer_stop(&sc->sc_hello_timer);
+
+}
+
+void
+bstp_initialize_port(struct bridge_softc *sc, struct bridge_iflist *bif)
+{
+       bstp_become_designated_port(sc, bif);
+       bstp_set_port_state(bif, BSTP_IFSTATE_BLOCKING);
+       bif->bif_topology_change_acknowledge = 0;
+       bif->bif_config_pending = 0;
+       bif->bif_change_detection_enabled = 1;
+       bstp_timer_stop(&bif->bif_message_age_timer);
+       bstp_timer_stop(&bif->bif_forward_delay_timer);
+       bstp_timer_stop(&bif->bif_hold_timer);
+}
+
+void
+bstp_enable_port(struct bridge_softc *sc, struct bridge_iflist *bif)
+{
+       bstp_initialize_port(sc, bif);
+       bstp_port_state_selection(sc);
+}
+
+void
+bstp_disable_port(struct bridge_softc *sc, struct bridge_iflist *bif)
+{
+       int root;
+
+       root = bstp_root_bridge(sc);
+       bstp_become_designated_port(sc, bif);
+       bstp_set_port_state(bif, BSTP_IFSTATE_DISABLED);
+       bif->bif_topology_change_acknowledge = 0;
+       bif->bif_config_pending = 0;
+       bstp_timer_stop(&bif->bif_message_age_timer);
+       bstp_timer_stop(&bif->bif_forward_delay_timer);
+       bstp_configuration_update(sc);
+       bstp_port_state_selection(sc);
+
+       if (bstp_root_bridge(sc) && (root == 0)) {
+               sc->sc_max_age = sc->sc_bridge_max_age;
+               sc->sc_hello_time = sc->sc_bridge_hello_time;
+               sc->sc_forward_delay = sc->sc_bridge_forward_delay;
+
+               bstp_topology_change_detection(sc);
+               bstp_timer_stop(&sc->sc_tcn_timer);
+               bstp_config_bpdu_generation(sc);
+               bstp_timer_start(&sc->sc_hello_timer, 0);
+       }
+}
+
+void
+bstp_set_bridge_priority(struct bridge_softc *sc, uint64_t new_bridge_id)
+{
+       struct bridge_iflist *bif;
+       int root;
+
+       root = bstp_root_bridge(sc);
+
+       LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
+               if ((bif->bif_flags & IFBIF_STP) == 0)
+                       continue;
+               if (bstp_designated_port(sc, bif))
+                       bif->bif_designated_bridge = new_bridge_id;
+       }
+
+       sc->sc_bridge_id = new_bridge_id;
+
+       bstp_configuration_update(sc);
+       bstp_port_state_selection(sc);
+
+       if (bstp_root_bridge(sc) && (root == 0)) {
+               sc->sc_max_age = sc->sc_bridge_max_age;
+               sc->sc_hello_time = sc->sc_bridge_hello_time;
+               sc->sc_forward_delay = sc->sc_bridge_forward_delay;
+
+               bstp_topology_change_detection(sc);
+               bstp_timer_stop(&sc->sc_tcn_timer);
+               bstp_config_bpdu_generation(sc);
+               bstp_timer_start(&sc->sc_hello_timer, 0);
+       }
+}
+
+void
+bstp_set_port_priority(struct bridge_softc *sc, struct bridge_iflist *bif,
+    uint16_t new_port_id)
+{
+       if (bstp_designated_port(sc, bif))
+               bif->bif_designated_port = new_port_id;
+
+       bif->bif_port_id = new_port_id;
+
+       if ((sc->sc_bridge_id == bif->bif_designated_bridge) &&
+           (bif->bif_port_id < bif->bif_designated_port)) {
+               bstp_become_designated_port(sc, bif);
+               bstp_port_state_selection(sc);
+       }
+}
+
+void
+bstp_set_path_cost(struct bridge_softc *sc, struct bridge_iflist *bif,
+    uint32_t path_cost)
+{
+       bif->bif_path_cost = path_cost;
+       bstp_configuration_update(sc);
+       bstp_port_state_selection(sc);
+}
+
+void
+bstp_enable_change_detection(struct bridge_iflist *bif)
+{
+       bif->bif_change_detection_enabled = 1;
+}
+
+void
+bstp_disable_change_detection(struct bridge_iflist *bif)
+{
+       bif->bif_change_detection_enabled = 0;
+}
+
+void
+bstp_ifupdstatus(struct bridge_softc *sc, struct bridge_iflist *bif)
+{
+       struct ifnet *ifp = bif->bif_ifp;
+    struct ifmediareq   ifmr;
+
+       if ((ifnet_flags(ifp) & IFF_UP)) {
+               bzero(&ifmr, sizeof(ifmr));
+               if (ifnet_ioctl(ifp, 0, SIOCGIFMEDIA, &ifmr) == 0) {
+                       // enable the port when the link is up, or its state is unknown
+                       if ((ifmr.ifm_status & IFM_ACTIVE) || !(ifmr.ifm_status & IFM_AVALID)) {
+                               if (bif->bif_state == BSTP_IFSTATE_DISABLED)
+                                       bstp_enable_port(sc, bif);
+                       } else {
+                               if (bif->bif_state != BSTP_IFSTATE_DISABLED)
+                                       bstp_disable_port(sc, bif);
+                       }
+               }
+               return;
+       }
+
+       if (bif->bif_state != BSTP_IFSTATE_DISABLED)
+               bstp_disable_port(sc, bif);
+}
+
+void
+bstp_tick(void *arg)
+{
+       struct bridge_softc *sc = arg;
+       struct bridge_iflist *bif;
+       struct timespec ts;
+
+       lck_mtx_lock(sc->sc_mtx);
+
+       LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
+               if ((bif->bif_flags & IFBIF_STP) == 0)
+                       continue;
+               /*
+                * XXX This can cause a lag in "link does away"
+                * XXX and "spanning tree gets updated".  We need
+                * XXX come sort of callback from the link state
+                * XXX update code to kick spanning tree.
+                * XXX --thorpej@NetBSD.org
+                */
+               bstp_ifupdstatus(sc, bif);
+       }
+
+       if (bstp_timer_expired(&sc->sc_hello_timer, sc->sc_hello_time))
+               bstp_hello_timer_expiry(sc);
+
+       if (bstp_timer_expired(&sc->sc_tcn_timer, sc->sc_bridge_hello_time))
+               bstp_tcn_timer_expiry(sc);
+
+       if (bstp_timer_expired(&sc->sc_topology_change_timer,
+           sc->sc_topology_change_time))
+               bstp_topology_change_timer_expiry(sc);
+
+       LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
+               if ((bif->bif_flags & IFBIF_STP) == 0)
+                       continue;
+               if (bstp_timer_expired(&bif->bif_message_age_timer,
+                   sc->sc_max_age))
+                       bstp_message_age_timer_expiry(sc, bif);
+       }
+
+       LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
+               if ((bif->bif_flags & IFBIF_STP) == 0)
+                       continue;
+               if (bstp_timer_expired(&bif->bif_forward_delay_timer,
+                   sc->sc_forward_delay))
+                       bstp_forward_delay_timer_expiry(sc, bif);
+
+               if (bstp_timer_expired(&bif->bif_hold_timer,
+                   sc->sc_hold_time))
+                       bstp_hold_timer_expiry(sc, bif);
+       }
+
+       lck_mtx_unlock(sc->sc_mtx);
+
+       /* APPLE MODIFICATION - bridge changes */
+       if (ifnet_flags(sc->sc_if) & IFF_RUNNING) {
+               ts.tv_sec = 1;
+               ts.tv_nsec = 0;
+               bsd_timeout(bstp_tick, sc, &ts);
+       }
+}
+
+void
+bstp_timer_start(struct bridge_timer *t, uint16_t v)
+{
+       t->value = v;
+       t->active = 1;
+}
+
+void
+bstp_timer_stop(struct bridge_timer *t)
+{
+       t->value = 0;
+       t->active = 0;
+}
+
+int
+bstp_timer_expired(struct bridge_timer *t, uint16_t v)
+{
+       if (t->active == 0)
+               return (0);
+       t->value += BSTP_TICK_VAL;
+       if (t->value >= v) {
+               bstp_timer_stop(t);
+               return (1);
+       }
+       return (0);
+
+}
index e82208b12ef23f845d1d71eaddf2ee394d18f18f..254d94b779984ea7990d374d845c3ee1c595a49a 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 1999-2009 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -1003,6 +1003,14 @@ dlil_interface_filters_input(struct ifnet * ifp, struct mbuf * * m_p,
                        }
                }
        }
+
+       /*
+        * Strip away M_PROTO1 bit prior to sending packet up the stack as 
+        * it is meant to be local to a subsystem -- if_bridge for M_PROTO1
+        */
+       if (*m_p != NULL)
+               (*m_p)->m_flags &= ~M_PROTO1;
+
        return (0);
 }
 
@@ -1350,28 +1358,6 @@ preout_again:
                        }
                }
        
-#if BRIDGE
-               /* !!!LOCKING!!!
-                *
-                * Need to consider how to handle this.
-                * Also note that return should be a goto cleanup
-                */
-               broken-locking
-               if (do_bridge) {
-                       struct mbuf *m0 = m;
-                       struct ether_header *eh = mtod(m, struct ether_header *);
-                       
-                       if (m->m_pkthdr.rcvif)
-                               m->m_pkthdr.rcvif = NULL;
-                       ifp = bridge_dst_lookup(eh);
-                       bdg_forward(&m0, ifp);
-                       if (m0)
-                               m_freem(m0);
-                       
-                       return 0 - should be goto cleanup?
-               }
-#endif
-
                /* 
                 * Let interface filters (if any) do their thing ...
                 */
@@ -1389,6 +1375,11 @@ preout_again:
                                }
                        }
                }
+               /*
+                * Strip away M_PROTO1 bit prior to sending packet to the driver 
+                * as this field may be used by the driver
+                */
+               m->m_flags &= ~M_PROTO1;
                
                /*
                 * Finally, call the driver.
@@ -1559,28 +1550,6 @@ preout_again:
                                m->m_pkthdr.rcvif = NULL;
                }
        
-#if BRIDGE
-               /* !!!LOCKING!!!
-                *
-                * Need to consider how to handle this.
-                * Also note that return should be a goto cleanup
-                */
-               broken-locking
-               if (do_bridge) {
-                       struct mbuf *m0 = m;
-                       struct ether_header *eh = mtod(m, struct ether_header *);
-                       
-                       if (m->m_pkthdr.rcvif)
-                               m->m_pkthdr.rcvif = NULL;
-                       ifp = bridge_dst_lookup(eh);
-                       bdg_forward(&m0, ifp);
-                       if (m0)
-                               m_freem(m0);
-                       
-                       return 0 - should be goto cleanup?
-               }
-#endif
-
                /* 
                 * Let interface filters (if any) do their thing ...
                 */
@@ -1599,6 +1568,12 @@ preout_again:
                        }
                }
 
+               /*
+                * Strip away M_PROTO1 bit prior to sending packet to the driver
+                * as this field may be used by the driver
+                */
+               m->m_flags &= ~M_PROTO1;
+
                /*
                 * If the underlying interface is not capable of handling a
                 * packet whose data portion spans across physically disjoint
index 9ae109b852ae96aaf8d05456215d7da07e6cd0fa..1adcbe27e9309569ea904d3057f70e1324b5d9d5 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000,2009 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -88,10 +88,6 @@ extern struct ifqueue atalkintrq;
 #endif
 
 
-#if BRIDGE
-#include <net/bridge.h>
-#endif
-
 /* #include "vlan.h" */
 #if NVLAN > 0
 #include <net/if_vlan_var.h>
index 42e0a67a7f24f091e7865d1aa168623fa2eeb35c..a6ec5b2c53f02b8084347086fb39dcf7f170e340 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -98,6 +98,9 @@
 #include <sys/socketvar.h>
 #include <net/if_vlan_var.h>
 #include <net/if_bond_var.h>
+#if IF_BRIDGE
+#include <net/if_bridgevar.h>
+#endif
 
 #include <net/dlil.h>
 
@@ -113,10 +116,6 @@ extern struct ifqueue atalkintrq;
 #endif
 
 
-#if BRIDGE
-#include <net/bridge.h>
-#endif
-
 #define memcpy(x,y,z)  bcopy(y, x, z)
 
 
@@ -636,6 +635,9 @@ __private_extern__ int ether_family_init(void)
 #if BOND
        bond_family_init();
 #endif /* BOND */
+#if IF_BRIDGE
+       bridgeattach(0);
+#endif
 
  done:
 
index 52fd3922958f18f5ddc3f67218748247e3e76bf7..371cccfd6c0c9718140468bb8daf7ff946f64a0f 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
 extern struct ifqueue pkintrq;
 #endif
 
-
-#if BRIDGE
-#include <net/bridge.h>
-#endif
-
 /* #include "vlan.h" */
 #if NVLAN > 0
 #include <net/if_vlan_var.h>
index 177631c4b5a4333369663f890c922a8da1279964..422866e737cc91fd89e45cd04af7f59b1912590d 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
 
 #include <net/dlil.h>
 
-#if BRIDGE
-#include <net/bridge.h>
-#endif
-
 /* #include "vlan.h" */
 #if NVLAN > 0
 #include <net/if_vlan_var.h>
index efbf23c0f126f09293c64a5c21e0956bced174af..00b7fa5fb8358c6a6c56c7bae62c05cbc68f2668 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000,2009 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -95,6 +95,8 @@ struct        ether_addr {
 #define ETHERTYPE_REVARP       0x8035  /* reverse Addr. resolution protocol */
 #define        ETHERTYPE_VLAN          0x8100  /* IEEE 802.1Q VLAN tagging */
 #define ETHERTYPE_IPV6         0x86dd  /* IPv6 */
+#define ETHERTYPE_PAE          0x888e  /* EAPOL PAE/802.1x */
+#define ETHERTYPE_RSN_PREAUTH  0x88c7  /* 802.11i / RSN Pre-Authentication */
 #define        ETHERTYPE_LOOPBACK      0x9000  /* used to test interfaces */
 /* XXX - add more useful types here */
 
@@ -119,6 +121,9 @@ struct      ether_addr *ether_aton(const char *);
 #ifdef BSD_KERNEL_PRIVATE
 extern u_char  etherbroadcastaddr[ETHER_ADDR_LEN];
 #endif
+
+#define ETHER_IS_MULTICAST(addr) (*(addr) & 0x01) /* is address mcast/bcast? */
+
 #endif /* KERNEL_PRIVATE */
 
 #ifndef KERNEL
index 20f3600376a89c98b9b4970864ef9769f98b1a40..229eb134f564601af1f22e6c1e4e211cbcef3b04 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2008 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
 #endif
 
 #ifdef KERNEL_PRIVATE
+#define         IF_MAXUNIT      0x7fff  /* historical value */
+
 struct if_clonereq {
        int     ifcr_total;             /* total cloners (out) */
        int     ifcr_count;             /* room for this many in user buffer */
@@ -406,6 +408,34 @@ struct ifmediareq32 {
 #pragma pack()
 #endif /* KERNEL_PRIVATE */
 
+
+#pragma pack(4)
+struct  ifdrv {
+       char            ifd_name[IFNAMSIZ];     /* if name, e.g. "en0" */
+       unsigned long   ifd_cmd;
+       size_t          ifd_len;
+       void            *ifd_data;
+};
+#pragma pack()
+
+#ifdef KERNEL_PRIVATE
+#pragma pack(4)
+struct ifdrv32 {
+       char            ifd_name[IFNAMSIZ];     /* if name, e.g. "en0" */
+       u_int32_t       ifd_cmd;
+       u_int32_t       ifd_len;
+       user32_addr_t   ifd_data;
+};
+
+struct  ifdrv64 {
+       char            ifd_name[IFNAMSIZ];     /* if name, e.g. "en0" */
+       u_int64_t       ifd_cmd;
+       u_int64_t       ifd_len;
+       user64_addr_t   ifd_data;
+};
+#pragma pack()
+#endif /* KERNEL_PRIVATE */
+
 /* 
  * Structure used to retrieve aux status data from interfaces.
  * Kernel suppliers to this interface should respect the formatting
diff --git a/bsd/net/if_bridge.c b/bsd/net/if_bridge.c
new file mode 100644 (file)
index 0000000..acce8fa
--- /dev/null
@@ -0,0 +1,3847 @@
+/*
+ * Copyright (c) 2004-2009 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+/*     $apfw: Revision 1.19  2008/10/24 02:34:06  cbzimmer Exp $       */
+/*     $NetBSD: if_bridge.c,v 1.46 2006/11/23 04:07:07 rpaulo Exp $    */
+
+/*
+ * Copyright 2001 Wasabi Systems, Inc.
+ * All rights reserved.
+ *
+ * Written by Jason R. Thorpe for Wasabi Systems, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *     must display the following acknowledgement:
+ *     This product includes software developed for the NetBSD Project by
+ *     Wasabi Systems, Inc.
+ * 4. The name of Wasabi Systems, Inc. may not be used to endorse
+ *     or promote products derived from this software without specific prior
+ *     written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Copyright (c) 1999, 2000 Jason L. Wright (jason@thought.net)
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *     must display the following acknowledgement:
+ *     This product includes software developed by Jason L. Wright
+ * 4. The name of the author may not be used to endorse or promote products
+ *     derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * OpenBSD: if_bridge.c,v 1.60 2001/06/15 03:38:33 itojun Exp
+ */
+
+/*
+ * Network interface bridge support.
+ *
+ * TODO:
+ *
+ *     - Currently only supports Ethernet-like interfaces (Ethernet,
+ *       802.11, VLANs on Ethernet, etc.)  Figure out a nice way
+ *       to bridge other types of interfaces (FDDI-FDDI, and maybe
+ *       consider heterogenous bridges).
+ */
+
+#include <sys/cdefs.h>
+//_KERNEL_RCSID(0, "$NetBSD: if_bridge.c,v 1.46 2006/11/23 04:07:07 rpaulo Exp $");
+
+//#include "opt_bridge_ipf.h"
+//#include "opt_inet.h"
+//#include "opt_pfil_hooks.h"
+//#include "opt_wlan.h"        /* APPLE MODIFICATION <cbz@apple.com> - Proxy STA support */
+//#include "bpfilter.h"
+//#include "gif.h" // APPLE MODIFICATION - add gif support
+
+#define BRIDGE_DEBUG 0
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/mbuf.h>
+#include <sys/queue.h>
+#include <sys/socket.h>
+#include <sys/sockio.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+//#include <sys/pool.h>
+#include <sys/kauth.h>
+#include <sys/random.h>
+#include <sys/kern_event.h>
+#include <sys/systm.h>
+#include <sys/sysctl.h>
+
+#include <libkern/libkern.h>
+
+#include <kern/zalloc.h>
+
+#if NBPFILTER > 0
+#include <net/bpf.h>
+#endif
+#include <net/if.h>
+#include <net/if_dl.h>
+#include <net/if_types.h>
+#include <net/if_llc.h>
+
+#include <net/if_ether.h>
+#include <net/if_bridgevar.h>
+#include <net/dlil.h>
+
+#include <net/kpi_interfacefilter.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>
+#ifdef INET6
+#include <netinet/ip6.h>
+#include <netinet6/in6_var.h>
+#include <netinet6/ip6_var.h>
+#endif
+
+#if BRIDGE_DEBUG
+#define static __private_extern__
+#endif
+
+extern void dlil_input_packet_list(struct ifnet *, struct mbuf *);
+
+/*
+ * Size of the route hash table.  Must be a power of two.
+ */
+/* APPLE MODIFICATION - per Wasabi performance improvement, change the hash table size */
+#if 0
+#ifndef BRIDGE_RTHASH_SIZE
+#define        BRIDGE_RTHASH_SIZE              1024
+#endif
+#else
+#ifndef BRIDGE_RTHASH_SIZE
+#define        BRIDGE_RTHASH_SIZE              256
+#endif
+#endif
+
+/* APPLE MODIFICATION - support for HW checksums */
+#if APPLE_BRIDGE_HWCKSUM_SUPPORT
+#include <netinet/udp.h>
+#include <netinet/tcp.h>
+#endif
+
+#define        BRIDGE_RTHASH_MASK              (BRIDGE_RTHASH_SIZE - 1)
+
+//#include "carp.h"
+#if NCARP > 0
+#include <netinet/in.h>
+#include <netinet/in_var.h>
+#include <netinet/ip_carp.h>
+#endif
+
+/*
+ * Maximum number of addresses to cache.
+ */
+#ifndef BRIDGE_RTABLE_MAX
+#define        BRIDGE_RTABLE_MAX               100
+#endif
+
+/* APPLE MODIFICATION <cbz@apple.com> - add support for Proxy STA */
+#if IEEE80211_PROXYSTA
+/*
+ * Maximum (additional to maxcache) number of proxysta addresses to cache.
+ */
+#ifndef BRIDGE_RTABLE_MAX_PROXYSTA
+#define        BRIDGE_RTABLE_MAX_PROXYSTA              16
+#endif
+#endif
+
+/*
+ * Spanning tree defaults.
+ */
+#define        BSTP_DEFAULT_MAX_AGE            (20 * 256)
+#define        BSTP_DEFAULT_HELLO_TIME         (2 * 256)
+#define        BSTP_DEFAULT_FORWARD_DELAY      (15 * 256)
+#define        BSTP_DEFAULT_HOLD_TIME          (1 * 256)
+#define        BSTP_DEFAULT_BRIDGE_PRIORITY    0x8000
+#define        BSTP_DEFAULT_PORT_PRIORITY      0x80
+#define        BSTP_DEFAULT_PATH_COST          55
+
+/*
+ * Timeout (in seconds) for entries learned dynamically.
+ */
+#ifndef BRIDGE_RTABLE_TIMEOUT
+#define        BRIDGE_RTABLE_TIMEOUT           (20 * 60)       /* same as ARP */
+#endif
+
+/*
+ * Number of seconds between walks of the route list.
+ */
+#ifndef BRIDGE_RTABLE_PRUNE_PERIOD
+#define        BRIDGE_RTABLE_PRUNE_PERIOD      (5 * 60)
+#endif
+
+/*
+ * List of capabilities to mask on the member interface.
+ */
+#define        BRIDGE_IFCAPS_MASK      \
+       (IFCAP_CSUM_IPv4_Tx |   \
+       IFCAP_CSUM_TCPv4_Tx |   \
+       IFCAP_CSUM_UDPv4_Tx |   \
+       IFCAP_CSUM_TCPv6_Tx |   \
+       IFCAP_CSUM_UDPv6_Tx)
+
+
+int    bridge_rtable_prune_period = BRIDGE_RTABLE_PRUNE_PERIOD;
+
+static zone_t bridge_rtnode_pool = NULL;
+
+static errno_t 
+bridge_iff_input(void* cookie, ifnet_t ifp, __unused protocol_family_t protocol,
+                 mbuf_t *data, char **frame_ptr);
+static void 
+bridge_iff_event(void* cookie, ifnet_t ifp, __unused protocol_family_t protocol,
+                 const struct kev_msg *event_msg);
+static void 
+bridge_iff_detached(void* cookie, __unused ifnet_t interface);
+
+static uint32_t
+bridge_rthash(__unused struct bridge_softc *sc, const uint8_t *addr);
+
+static int     bridge_clone_create(struct if_clone *, int);
+static void    bridge_clone_destroy(struct ifnet *);
+
+static errno_t bridge_ioctl(ifnet_t ifp, unsigned long cmd, void *data);
+#if HAS_IF_CAP
+static void    bridge_mutecaps(struct bridge_iflist *, int);
+#endif
+static int     bridge_init(struct ifnet *);
+static void    bridge_stop(struct ifnet *, int);
+
+#if BRIDGE_MEMBER_OUT_FILTER
+static errno_t
+bridge_iff_output(void *cookie, ifnet_t ifp, protocol_family_t protocol, mbuf_t *data);
+static int bridge_output(struct bridge_softc *sc, ifnet_t ifp, mbuf_t m);
+#endif /* BRIDGE_MEMBER_OUT_FILTER */
+
+static errno_t bridge_start(struct ifnet *, mbuf_t);
+static errno_t bridge_set_bpf_tap(ifnet_t ifn, bpf_tap_mode mode, bpf_packet_func bpf_callback);
+__private_extern__ errno_t bridge_bpf_input(ifnet_t ifp, struct mbuf *m);
+__private_extern__ errno_t bridge_bpf_output(ifnet_t ifp, struct mbuf *m);
+
+static void bridge_detach(ifnet_t ifp);
+
+static errno_t bridge_input(struct bridge_iflist *, struct ifnet *, struct mbuf *, void *frame_header);
+
+static void    bridge_forward(struct bridge_softc *, struct mbuf *m);
+
+static void    bridge_timer(void *);
+
+static void    bridge_broadcast(struct bridge_softc *, struct ifnet *,
+                             struct mbuf *, int);
+
+static int     bridge_rtupdate(struct bridge_softc *, const uint8_t *,
+                            struct ifnet *, int, uint8_t);
+static struct ifnet *bridge_rtlookup(struct bridge_softc *, const uint8_t *);
+static void    bridge_rttrim(struct bridge_softc *);
+static void    bridge_rtage(struct bridge_softc *);
+static void    bridge_rtflush(struct bridge_softc *, int);
+/* APPLE MODIFICATION <cbz@apple.com> - add support for Proxy STA */
+#if IEEE80211_PROXYSTA
+static void    bridge_rtdiscovery(struct bridge_softc *);
+static void    bridge_rtpurge(struct bridge_softc *, struct ifnet *);
+#endif
+static int     bridge_rtdaddr(struct bridge_softc *, const uint8_t *);
+
+static int     bridge_rtable_init(struct bridge_softc *);
+static void    bridge_rtable_fini(struct bridge_softc *);
+
+static struct bridge_rtnode *bridge_rtnode_lookup(struct bridge_softc *,
+                                                  const uint8_t *);
+static int     bridge_rtnode_insert(struct bridge_softc *,
+                                 struct bridge_rtnode *);
+static void    bridge_rtnode_destroy(struct bridge_softc *,
+                                  struct bridge_rtnode *);
+
+static struct bridge_iflist *bridge_lookup_member(struct bridge_softc *,
+                                                  const char *name);
+static struct bridge_iflist *bridge_lookup_member_if(struct bridge_softc *,
+                                                     struct ifnet *ifp);
+static void    bridge_delete_member(struct bridge_softc *,
+                                 struct bridge_iflist *);
+
+static void    bridge_ifdetach(struct bridge_iflist *bif, struct ifnet *ifp);
+
+
+static int     bridge_ioctl_add(struct bridge_softc *, void *);
+static int     bridge_ioctl_del(struct bridge_softc *, void *);
+/* APPLE MODIFICATION <cbz@apple.com> - add support for Proxy STA */
+#if IEEE80211_PROXYSTA
+static int bridge_ioctl_purge(struct bridge_softc *sc, void *arg);
+#endif
+static int     bridge_ioctl_gifflags(struct bridge_softc *, void *);
+static int     bridge_ioctl_sifflags(struct bridge_softc *, void *);
+static int     bridge_ioctl_scache(struct bridge_softc *, void *);
+static int     bridge_ioctl_gcache(struct bridge_softc *, void *);
+static int     bridge_ioctl_gifs32(struct bridge_softc *, void *);
+static int     bridge_ioctl_gifs64(struct bridge_softc *, void *);
+static int     bridge_ioctl_rts32(struct bridge_softc *, void *);
+static int     bridge_ioctl_rts64(struct bridge_softc *, void *);
+static int     bridge_ioctl_saddr32(struct bridge_softc *, void *);
+static int     bridge_ioctl_saddr64(struct bridge_softc *, void *);
+static int     bridge_ioctl_sto(struct bridge_softc *, void *);
+static int     bridge_ioctl_gto(struct bridge_softc *, void *);
+static int     bridge_ioctl_daddr32(struct bridge_softc *, void *);
+static int     bridge_ioctl_daddr64(struct bridge_softc *, void *);
+static int     bridge_ioctl_flush(struct bridge_softc *, void *);
+static int     bridge_ioctl_gpri(struct bridge_softc *, void *);
+static int     bridge_ioctl_spri(struct bridge_softc *, void *);
+static int     bridge_ioctl_ght(struct bridge_softc *, void *);
+static int     bridge_ioctl_sht(struct bridge_softc *, void *);
+static int     bridge_ioctl_gfd(struct bridge_softc *, void *);
+static int     bridge_ioctl_sfd(struct bridge_softc *, void *);
+static int     bridge_ioctl_gma(struct bridge_softc *, void *);
+static int     bridge_ioctl_sma(struct bridge_softc *, void *);
+static int     bridge_ioctl_sifprio(struct bridge_softc *, void *);
+static int     bridge_ioctl_sifcost(struct bridge_softc *, void *);
+
+struct bridge_control {
+       int                             (*bc_func)(struct bridge_softc *, void *);
+       unsigned int    bc_argsize;
+       unsigned int    bc_flags;
+};
+
+#define        BC_F_COPYIN             0x01    /* copy arguments in */
+#define        BC_F_COPYOUT            0x02    /* copy arguments out */
+#define        BC_F_SUSER              0x04    /* do super-user check */
+
+static const struct bridge_control bridge_control_table32[] = {
+       { bridge_ioctl_add,             sizeof(struct ifbreq),
+               BC_F_COPYIN|BC_F_SUSER },
+       { bridge_ioctl_del,             sizeof(struct ifbreq),
+               BC_F_COPYIN|BC_F_SUSER },
+       
+       { bridge_ioctl_gifflags,        sizeof(struct ifbreq),
+               BC_F_COPYIN|BC_F_COPYOUT },
+       { bridge_ioctl_sifflags,        sizeof(struct ifbreq),
+               BC_F_COPYIN|BC_F_SUSER },
+       
+       { bridge_ioctl_scache,          sizeof(struct ifbrparam),
+               BC_F_COPYIN|BC_F_SUSER },
+       { bridge_ioctl_gcache,          sizeof(struct ifbrparam),
+               BC_F_COPYOUT },
+       
+       { bridge_ioctl_gifs32,          sizeof(struct ifbifconf32),
+               BC_F_COPYIN|BC_F_COPYOUT },
+       { bridge_ioctl_rts32,           sizeof(struct ifbaconf32),
+               BC_F_COPYIN|BC_F_COPYOUT },
+       
+       { bridge_ioctl_saddr32,         sizeof(struct ifbareq32),
+               BC_F_COPYIN|BC_F_SUSER },
+       
+       { bridge_ioctl_sto,             sizeof(struct ifbrparam),
+               BC_F_COPYIN|BC_F_SUSER },
+       { bridge_ioctl_gto,             sizeof(struct ifbrparam),
+               BC_F_COPYOUT },
+       
+       { bridge_ioctl_daddr32,         sizeof(struct ifbareq32),
+               BC_F_COPYIN|BC_F_SUSER },
+       
+       { bridge_ioctl_flush,           sizeof(struct ifbreq),
+               BC_F_COPYIN|BC_F_SUSER },
+       
+       { bridge_ioctl_gpri,            sizeof(struct ifbrparam),
+               BC_F_COPYOUT },
+       { bridge_ioctl_spri,            sizeof(struct ifbrparam),
+               BC_F_COPYIN|BC_F_SUSER },
+       
+       { bridge_ioctl_ght,             sizeof(struct ifbrparam),
+               BC_F_COPYOUT },
+       { bridge_ioctl_sht,             sizeof(struct ifbrparam),
+               BC_F_COPYIN|BC_F_SUSER },
+       
+       { bridge_ioctl_gfd,             sizeof(struct ifbrparam),
+               BC_F_COPYOUT },
+       { bridge_ioctl_sfd,             sizeof(struct ifbrparam),
+               BC_F_COPYIN|BC_F_SUSER },
+       
+       { bridge_ioctl_gma,             sizeof(struct ifbrparam),
+               BC_F_COPYOUT },
+       { bridge_ioctl_sma,             sizeof(struct ifbrparam),
+               BC_F_COPYIN|BC_F_SUSER },
+       
+       { bridge_ioctl_sifprio,         sizeof(struct ifbreq),
+               BC_F_COPYIN|BC_F_SUSER },
+       
+       { bridge_ioctl_sifcost,         sizeof(struct ifbreq),
+               BC_F_COPYIN|BC_F_SUSER },
+       
+       /* APPLE MODIFICATION <cbz@apple.com> - add support for Proxy STA */
+#if IEEE80211_PROXYSTA
+       { bridge_ioctl_purge,   sizeof(struct ifbreq),
+               BC_F_COPYIN|BC_F_SUSER },
+#endif
+};
+
+static const struct bridge_control bridge_control_table64[] = {
+       { bridge_ioctl_add,             sizeof(struct ifbreq),
+               BC_F_COPYIN|BC_F_SUSER },
+       { bridge_ioctl_del,             sizeof(struct ifbreq),
+               BC_F_COPYIN|BC_F_SUSER },
+       
+       { bridge_ioctl_gifflags,        sizeof(struct ifbreq),
+               BC_F_COPYIN|BC_F_COPYOUT },
+       { bridge_ioctl_sifflags,        sizeof(struct ifbreq),
+               BC_F_COPYIN|BC_F_SUSER },
+       
+       { bridge_ioctl_scache,          sizeof(struct ifbrparam),
+               BC_F_COPYIN|BC_F_SUSER },
+       { bridge_ioctl_gcache,          sizeof(struct ifbrparam),
+               BC_F_COPYOUT },
+       
+       { bridge_ioctl_gifs64,          sizeof(struct ifbifconf64),
+               BC_F_COPYIN|BC_F_COPYOUT },
+       { bridge_ioctl_rts64,           sizeof(struct ifbaconf64),
+               BC_F_COPYIN|BC_F_COPYOUT },
+       
+       { bridge_ioctl_saddr64,         sizeof(struct ifbareq64),
+               BC_F_COPYIN|BC_F_SUSER },
+       
+       { bridge_ioctl_sto,             sizeof(struct ifbrparam),
+               BC_F_COPYIN|BC_F_SUSER },
+       { bridge_ioctl_gto,             sizeof(struct ifbrparam),
+               BC_F_COPYOUT },
+       
+       { bridge_ioctl_daddr64,         sizeof(struct ifbareq64),
+               BC_F_COPYIN|BC_F_SUSER },
+       
+       { bridge_ioctl_flush,           sizeof(struct ifbreq),
+               BC_F_COPYIN|BC_F_SUSER },
+       
+       { bridge_ioctl_gpri,            sizeof(struct ifbrparam),
+               BC_F_COPYOUT },
+       { bridge_ioctl_spri,            sizeof(struct ifbrparam),
+               BC_F_COPYIN|BC_F_SUSER },
+       
+       { bridge_ioctl_ght,             sizeof(struct ifbrparam),
+               BC_F_COPYOUT },
+       { bridge_ioctl_sht,             sizeof(struct ifbrparam),
+               BC_F_COPYIN|BC_F_SUSER },
+       
+       { bridge_ioctl_gfd,             sizeof(struct ifbrparam),
+               BC_F_COPYOUT },
+       { bridge_ioctl_sfd,             sizeof(struct ifbrparam),
+               BC_F_COPYIN|BC_F_SUSER },
+       
+       { bridge_ioctl_gma,             sizeof(struct ifbrparam),
+               BC_F_COPYOUT },
+       { bridge_ioctl_sma,             sizeof(struct ifbrparam),
+               BC_F_COPYIN|BC_F_SUSER },
+       
+       { bridge_ioctl_sifprio,         sizeof(struct ifbreq),
+               BC_F_COPYIN|BC_F_SUSER },
+       
+       { bridge_ioctl_sifcost,         sizeof(struct ifbreq),
+               BC_F_COPYIN|BC_F_SUSER },
+       
+       /* APPLE MODIFICATION <cbz@apple.com> - add support for Proxy STA */
+#if IEEE80211_PROXYSTA
+       { bridge_ioctl_purge,   sizeof(struct ifbreq),
+               BC_F_COPYIN|BC_F_SUSER },
+#endif
+};
+
+static const unsigned int bridge_control_table_size =
+sizeof(bridge_control_table32) / sizeof(bridge_control_table32[0]);
+
+static LIST_HEAD(, bridge_softc) bridge_list = LIST_HEAD_INITIALIZER(bridge_list);
+
+static lck_grp_t *bridge_lock_grp = NULL;
+static lck_attr_t *bridge_lock_attr = NULL;
+
+static lck_rw_t *bridge_list_lock = NULL;
+
+
+static struct if_clone bridge_cloner = 
+       IF_CLONE_INITIALIZER("bridge", 
+                                                bridge_clone_create, 
+                                                bridge_clone_destroy, 
+                                                0, 
+                                                IF_MAXUNIT);
+
+#if BRIDGE_DEBUG
+
+SYSCTL_DECL(_net_link);
+
+SYSCTL_NODE(_net_link, IFT_BRIDGE, bridge, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "Bridge");
+
+__private_extern__ int _if_brige_debug = 0;
+
+SYSCTL_INT(_net_link_bridge, OID_AUTO, debug, CTLFLAG_RW,
+           &_if_brige_debug, 0, "Bridge debug");
+
+static void printf_ether_header(struct ether_header *eh);
+static void printf_mbuf_data(mbuf_t m, size_t offset, size_t len);
+static void printf_mbuf_pkthdr(mbuf_t m, const char *prefix, const char *suffix);
+static void printf_mbuf(mbuf_t m, const char *prefix, const char *suffix);
+static void link_print(struct sockaddr_dl * dl_p);
+
+void
+printf_mbuf_pkthdr(mbuf_t m, const char *prefix, const char *suffix)
+{
+       if (m)
+               printf("%spktlen: %u rcvif: %p header: %p nextpkt: %p%s",
+                          prefix ? prefix : "",
+                          (unsigned int)mbuf_pkthdr_len(m), mbuf_pkthdr_rcvif(m), mbuf_pkthdr_header(m), mbuf_nextpkt(m),
+                          suffix ? suffix : "");
+       else
+               printf("%s<NULL>%s\n", prefix, suffix);
+}
+
+void
+printf_mbuf(mbuf_t m, const char *prefix, const char *suffix)
+{
+       if (m) {
+               printf("%s%p type: %u flags: 0x%x len: %u data: %p maxlen: %u datastart: %p next: %p%s",
+                          prefix ? prefix : "",
+                          m, mbuf_type(m), mbuf_flags(m), (unsigned int)mbuf_len(m), mbuf_data(m), 
+                          (unsigned int)mbuf_maxlen(m), mbuf_datastart(m), mbuf_next(m), 
+                          !suffix || (mbuf_flags(m) & MBUF_PKTHDR) ? "" : suffix);
+               if ((mbuf_flags(m) & MBUF_PKTHDR))
+                       printf_mbuf_pkthdr(m, " ", suffix);
+       } else
+               printf("%s<NULL>%s\n", prefix, suffix);
+}
+
+void
+printf_mbuf_data(mbuf_t m, size_t offset, size_t len)
+{
+       mbuf_t                  n;
+       size_t                  i, j;
+       size_t                  pktlen, mlen, maxlen;
+       unsigned char   *ptr;
+       
+       pktlen = mbuf_pkthdr_len(m);
+       
+       if (offset > pktlen)
+               return;
+       
+       maxlen = (pktlen - offset > len) ? len : pktlen;
+       n = m;
+       mlen = mbuf_len(n);
+       ptr = mbuf_data(n);
+       for (i = 0, j = 0; i < maxlen; i++, j++) {
+               if (j >= mlen) {
+                       n = mbuf_next(n);
+                       if (n == 0)
+                               break;
+                       ptr = mbuf_data(n);
+                       mlen = mbuf_len(n);
+                       j = 0;
+               }
+               if (i >= offset) {
+                       printf("%02x%s", ptr[j], i % 2 ? " " : "");
+               }
+       }
+       return;
+}
+
+static void
+printf_ether_header(struct ether_header *eh)
+{
+       printf("%02x:%02x:%02x:%02x:%02x:%02x > %02x:%02x:%02x:%02x:%02x:%02x 0x%04x ", 
+                  eh->ether_shost[0], eh->ether_shost[1], eh->ether_shost[2], 
+                  eh->ether_shost[3], eh->ether_shost[4], eh->ether_shost[5], 
+                  eh->ether_dhost[0], eh->ether_dhost[1], eh->ether_dhost[2], 
+                  eh->ether_dhost[3], eh->ether_dhost[4], eh->ether_dhost[5], 
+                  eh->ether_type);
+}
+#endif /* BRIDGE_DEBUG */
+
+/*
+ * bridgeattach:
+ *
+ *     Pseudo-device attach routine.
+ */
+__private_extern__ int
+bridgeattach(__unused int n)
+{
+       int error;
+       lck_grp_attr_t *lck_grp_attr = NULL;
+       
+       bridge_rtnode_pool = zinit(sizeof(struct bridge_rtnode), 1024 * sizeof(struct bridge_rtnode),
+                               0, "bridge_rtnode");
+       
+       lck_grp_attr = lck_grp_attr_alloc_init();
+       
+       bridge_lock_grp = lck_grp_alloc_init("if_bridge", lck_grp_attr);
+       
+       bridge_lock_attr = lck_attr_alloc_init();
+       
+#if BRIDGE_DEBUG
+       lck_attr_setdebug(bridge_lock_attr);
+#endif
+
+       bridge_list_lock = lck_rw_alloc_init(bridge_lock_grp, bridge_lock_attr);
+       
+       // can free the attributes once we've allocated the group lock
+       lck_grp_attr_free(lck_grp_attr);
+       
+       LIST_INIT(&bridge_list);
+       error = if_clone_attach(&bridge_cloner);
+
+       return error;
+}
+
+#if BRIDGE_DEBUG
+
+static void
+link_print(struct sockaddr_dl * dl_p)
+{
+       int i;
+       
+#if 1
+       printf("sdl len %d index %d family %d type 0x%x nlen %d alen %d"
+           " slen %d addr ", dl_p->sdl_len,
+           dl_p->sdl_index,  dl_p->sdl_family, dl_p->sdl_type,
+           dl_p->sdl_nlen, dl_p->sdl_alen, dl_p->sdl_slen);
+#endif
+       for (i = 0; i < dl_p->sdl_alen; i++)
+        printf("%s%x", i ? ":" : "",
+               (CONST_LLADDR(dl_p))[i]);
+       printf("\n");
+       return;
+}
+#endif /* BRIDGE_DEBUG */
+
+
+/*
+ * bridge_clone_create:
+ *
+ *     Create a new bridge instance.
+ */
+/* APPLE MODIFICATION <cbz@apple.com> - add opaque <const caddr_t params> argument for cloning.  This is done for 
+ net80211's VAP creation (with the Marvell codebase).  I think this could end up being useful
+ for other devices, too.  This is not in an ifdef because it doesn't hurt anything to have 
+ this extra param */
+static int
+bridge_clone_create(struct if_clone *ifc, int unit)
+{
+       struct bridge_softc *sc = NULL;
+       struct ifnet *ifp = NULL;
+       u_char eaddr[6];
+       uint32_t r;
+       struct ifnet_init_params init_params;
+       errno_t error = 0;
+       uint32_t sdl_buffer[offsetof(struct sockaddr_dl, sdl_data) + IFNAMSIZ + ETHER_ADDR_LEN];
+       struct sockaddr_dl *sdl = (struct sockaddr_dl *)sdl_buffer;
+       
+       sc = _MALLOC(sizeof(*sc), M_DEVBUF, M_WAITOK);
+       memset(sc, 0, sizeof(*sc));
+       
+       sc->sc_brtmax = BRIDGE_RTABLE_MAX;
+       /* APPLE MODIFICATION <cbz@apple.com> - add support for Proxy STA */
+#if IEEE80211_PROXYSTA
+       sc->sc_brtmax_proxysta = BRIDGE_RTABLE_MAX_PROXYSTA;
+#endif
+       sc->sc_brttimeout = BRIDGE_RTABLE_TIMEOUT;
+       sc->sc_bridge_max_age = BSTP_DEFAULT_MAX_AGE;
+       sc->sc_bridge_hello_time = BSTP_DEFAULT_HELLO_TIME;
+       sc->sc_bridge_forward_delay = BSTP_DEFAULT_FORWARD_DELAY;
+       sc->sc_bridge_priority = BSTP_DEFAULT_BRIDGE_PRIORITY;
+       sc->sc_hold_time = BSTP_DEFAULT_HOLD_TIME;
+       sc->sc_filter_flags = IFBF_FILT_DEFAULT;
+#ifndef BRIDGE_IPF
+       /*
+        * For backwards compatibility with previous behaviour...
+        * Switch off filtering on the bridge itself if BRIDGE_IPF is
+        * not defined.
+        */
+       sc->sc_filter_flags &= ~IFBF_FILT_USEIPF;
+#endif
+       
+       /* Initialize our routing table. */
+       error = bridge_rtable_init(sc);
+       if (error != 0) {
+               printf("bridge_clone_create: bridge_rtable_init failed %d\n", error);
+               goto done;
+       }
+       
+       LIST_INIT(&sc->sc_iflist);
+
+       sc->sc_mtx = lck_mtx_alloc_init(bridge_lock_grp, bridge_lock_attr);
+       
+       /* use the interface name as the unique id for ifp recycle */
+       snprintf(sc->sc_if_xname, sizeof(sc->sc_if_xname), "%s%d",
+             ifc->ifc_name, unit);
+       memset(&init_params, 0, sizeof(struct ifnet_init_params));
+       init_params.uniqueid = sc->sc_if_xname;
+       init_params.uniqueid_len = strlen(sc->sc_if_xname);
+       init_params.name = ifc->ifc_name;
+       init_params.unit = unit;
+       init_params.family = IFNET_FAMILY_ETHERNET;
+       init_params.type = IFT_BRIDGE;
+       init_params.output = bridge_start;
+       init_params.demux = ether_demux;
+       init_params.add_proto = ether_add_proto;
+       init_params.del_proto = ether_del_proto;
+       init_params.check_multi = ether_check_multi;
+       init_params.framer = ether_frameout;
+       init_params.softc = sc;
+       init_params.ioctl = bridge_ioctl;
+       init_params.set_bpf_tap = bridge_set_bpf_tap;
+       init_params.detach = bridge_detach;
+       init_params.broadcast_addr = etherbroadcastaddr;
+       init_params.broadcast_len = ETHER_ADDR_LEN;
+       error = ifnet_allocate(&init_params, &ifp);
+       if (error != 0) {
+               printf("bridge_clone_create: ifnet_allocate failed %d\n", error);
+               goto done;
+       }
+       sc->sc_if = ifp;
+       
+       error = ifnet_set_mtu(ifp, ETHERMTU);
+       if (error != 0) {
+               printf("bridge_clone_create: ifnet_set_mtu failed %d\n", error);
+               goto done;
+       }
+       error = ifnet_set_addrlen(ifp, ETHER_ADDR_LEN);
+       if (error != 0) {
+               printf("bridge_clone_create: ifnet_set_addrlen failed %d\n", error);
+               goto done;
+       }
+       error = ifnet_set_baudrate(ifp, 10000000) ;     // XXX: this is what IONetworking does
+       if (error != 0) {
+               printf("bridge_clone_create: ifnet_set_baudrate failed %d\n", error);
+               goto done;
+       }
+       error = ifnet_set_hdrlen(ifp, ETHER_HDR_LEN);
+       if (error != 0) {
+               printf("bridge_clone_create: ifnet_set_hdrlen failed %d\n", error);
+               goto done;
+       }
+       error = ifnet_set_flags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_NOTRAILERS | IFF_MULTICAST, 
+                                                       0xffff);
+       if (error != 0) {
+               printf("bridge_clone_create: ifnet_set_flags failed %d\n", error);
+               goto done;
+       }
+       
+       /*
+        * Generate a random ethernet address and use the private AC:DE:48
+        * OUI code.
+        */
+       read_random(&r, sizeof(r));
+       eaddr[0] = 0xAC;
+       eaddr[1] = 0xDE;
+       eaddr[2] = 0x48;
+       eaddr[3] = (r >> 0)  & 0xffu;
+       eaddr[4] = (r >> 8)  & 0xffu;
+       eaddr[5] = (r >> 16) & 0xffu;
+       
+       memset(sdl, 0, sizeof(sdl_buffer));
+       sdl->sdl_family = AF_LINK;
+       sdl->sdl_nlen = strlen(sc->sc_if_xname);
+       sdl->sdl_alen = ETHER_ADDR_LEN;
+       sdl->sdl_len = offsetof(struct sockaddr_dl, sdl_data);
+       memcpy(sdl->sdl_data, sc->sc_if_xname, sdl->sdl_nlen);
+       memcpy(LLADDR(sdl), eaddr, ETHER_ADDR_LEN);
+       
+#if BRIDGE_DEBUG
+       link_print(sdl);
+#endif
+
+       error = ifnet_attach(ifp, NULL);
+       if (error != 0) {
+               printf("bridge_clone_create: ifnet_attach failed %d\n", error);
+               goto done;
+       }
+       
+       error = ifnet_set_lladdr_and_type(ifp, eaddr, ETHER_ADDR_LEN, IFT_ETHER);
+       if (error != 0) {
+               printf("bridge_clone_create: ifnet_set_lladdr_and_type failed %d\n", error);
+               goto done;
+       }
+       
+#if APPLE_BRIDGE_HWCKSUM_SUPPORT
+       /* 
+        * APPLE MODIFICATION - our bridge can support HW checksums 
+        * (useful if underlying interfaces support them) on TX,
+        * RX is not that interesting, since the stack just looks to
+        * see if the packet has been checksummed already (I think)
+        * but we might as well indicate we support it
+        */
+       ifp->if_capabilities =
+               IFCAP_CSUM_IPv4_Tx | IFCAP_CSUM_TCPv4_Tx | IFCAP_CSUM_UDPv4_Tx |
+               IFCAP_CSUM_IPv4_Rx | IFCAP_CSUM_TCPv4_Rx | IFCAP_CSUM_UDPv4_Rx ;
+#endif
+       
+       lck_rw_lock_exclusive(bridge_list_lock);
+       LIST_INSERT_HEAD(&bridge_list, sc, sc_list);
+       lck_rw_done(bridge_list_lock);
+
+       /* attach as ethernet */
+       error = bpf_attach(ifp, DLT_EN10MB, sizeof(struct ether_header), NULL, NULL);
+       
+done:
+       if (error != 0) {
+        printf("bridge_clone_create failed error %d\n", error);
+               /* Cleanup TBD */
+       }
+       
+       return error;
+}
+
+/*
+ * bridge_clone_destroy:
+ *
+ *     Destroy a bridge instance.
+ */
+static void
+bridge_clone_destroy(struct ifnet *ifp)
+{
+       struct bridge_softc *sc = (struct bridge_softc *)ifnet_softc(ifp);
+       struct bridge_iflist *bif;
+       int error;
+       
+       lck_mtx_lock(sc->sc_mtx);
+       if ((sc->sc_flags & SCF_DETACHING)) {
+               lck_mtx_unlock(sc->sc_mtx);
+               return;
+       }
+       sc->sc_flags |= SCF_DETACHING;
+       
+       bridge_stop(ifp, 1);
+       
+       error = ifnet_set_flags(ifp, 0, IFF_UP);
+       if (error != 0) {
+               printf("bridge_clone_destroy: ifnet_set_flags failed %d\n", error);
+       }
+       
+       while ((bif = LIST_FIRST(&sc->sc_iflist)) != NULL)
+               bridge_delete_member(sc, bif);
+       
+       lck_mtx_unlock(sc->sc_mtx);
+       
+       error = ifnet_detach(ifp);
+       if (error != 0) {
+               printf("bridge_clone_destroy: ifnet_detach failed %d\n", error);
+               if ((sc = (struct bridge_softc *)ifnet_softc(ifp)) != NULL) {
+                       lck_mtx_lock(sc->sc_mtx);
+                       sc->sc_flags &= ~SCF_DETACHING;
+                       lck_mtx_unlock(sc->sc_mtx);
+               }
+       }
+       
+       return;
+}
+
+#define DRVSPEC \
+       if (ifd->ifd_cmd >= bridge_control_table_size) { \
+               error = EINVAL; \
+               break; \
+       } \
+       bc = &bridge_control_table[ifd->ifd_cmd]; \
+        \
+       if ((cmd & IOC_DIRMASK) == IOC_INOUT && \
+               (bc->bc_flags & BC_F_COPYOUT) == 0) { \
+               error = EINVAL; \
+               break; \
+       } \
+       else if (((cmd & IOC_DIRMASK) == IOC_IN) && \
+                        (bc->bc_flags & BC_F_COPYOUT) != 0) { \
+               error = EINVAL; \
+               break; \
+       } \
+        \
+       if (bc->bc_flags & BC_F_SUSER) { \
+               error = kauth_authorize_generic(kauth_cred_get(), KAUTH_GENERIC_ISSUSER); \
+               if (error) \
+                       break; \
+       } \
+        \
+       if (ifd->ifd_len != bc->bc_argsize || \
+               ifd->ifd_len > sizeof(args)) { \
+               error = EINVAL; \
+               break; \
+       } \
+        \
+       memset(&args, 0, sizeof(args)); \
+       if (bc->bc_flags & BC_F_COPYIN) { \
+               error = copyin(ifd->ifd_data, &args, ifd->ifd_len); \
+               if (error) \
+                       break; \
+       } \
+        \
+       lck_mtx_lock(sc->sc_mtx); \
+       error = (*bc->bc_func)(sc, &args); \
+       lck_mtx_unlock(sc->sc_mtx); \
+       if (error) \
+               break; \
+        \
+       if (bc->bc_flags & BC_F_COPYOUT) \
+               error = copyout(&args, ifd->ifd_data, ifd->ifd_len)
+
+/*
+ * bridge_ioctl:
+ *
+ *     Handle a control request from the operator.
+ */
+static errno_t
+bridge_ioctl(ifnet_t ifp, unsigned long cmd, void *data)
+{
+       struct bridge_softc *sc = ifnet_softc(ifp);
+       struct ifreq *ifr = (struct ifreq *) data;
+       int error = 0;
+       
+       lck_mtx_assert(sc->sc_mtx, LCK_MTX_ASSERT_NOTOWNED);
+
+#if BRIDGE_DEBUG
+       printf("bridge_ioctl: ifp %p cmd 0x%08lx (%c%c [%lu] %c %lu)\n", 
+                  ifp, 
+                  cmd, 
+                  (cmd & IOC_IN) ? 'I' : ' ',
+                  (cmd & IOC_OUT) ? 'O' : ' ',
+                  IOCPARM_LEN(cmd),
+                  (char)IOCGROUP(cmd),
+                  cmd & 0xff);
+       printf("SIOCGDRVSPEC32 %lx SIOCGDRVSPEC64 %lx\n", SIOCGDRVSPEC32, SIOCGDRVSPEC64);
+#endif
+       
+       switch (cmd) {
+               case SIOCADDMULTI:
+                       break;
+               case SIOCDELMULTI:
+                       break;
+                       
+               case SIOCSDRVSPEC32:
+               case SIOCGDRVSPEC32: {
+                       union {
+                               struct ifbreq ifbreq;
+                               struct ifbifconf32 ifbifconf;
+                               struct ifbareq32 ifbareq;
+                               struct ifbaconf32 ifbaconf;
+                               struct ifbrparam ifbrparam;
+                       } args;
+                       struct ifdrv32 *ifd = (struct ifdrv32 *) data;
+                       const struct bridge_control *bridge_control_table = bridge_control_table32, *bc;
+                       
+                       DRVSPEC;
+                       
+                       break;
+               }
+               case SIOCSDRVSPEC64:
+               case SIOCGDRVSPEC64: {
+                       union {
+                               struct ifbreq ifbreq;
+                               struct ifbifconf64 ifbifconf;
+                               struct ifbareq64 ifbareq;
+                               struct ifbaconf64 ifbaconf;
+                               struct ifbrparam ifbrparam;
+                       } args;
+                       struct ifdrv64 *ifd = (struct ifdrv64 *) data;
+                       const struct bridge_control *bridge_control_table = bridge_control_table64, *bc;
+                       
+                       DRVSPEC;
+                       
+                       break;
+               }
+                       
+               case SIOCSIFFLAGS:
+                       if ((ifnet_flags(ifp) & (IFF_UP|IFF_RUNNING)) == IFF_RUNNING) {
+                               /*
+                                * If interface is marked down and it is running,
+                                * then stop and disable it.
+                                */
+                               lck_mtx_lock(sc->sc_mtx);
+                               bridge_stop(ifp, 1);
+                               lck_mtx_unlock(sc->sc_mtx);
+                       } else if ((ifnet_flags(ifp) & (IFF_UP|IFF_RUNNING)) == IFF_UP) {
+                               /*
+                                * If interface is marked up and it is stopped, then
+                                * start it.
+                                */
+                               lck_mtx_lock(sc->sc_mtx);
+                               error = bridge_init(ifp);
+                               lck_mtx_unlock(sc->sc_mtx);
+                       }
+                       break;
+                       
+               case SIOCSIFMTU:
+#if 0
+                       /* APPLE MODIFICATION <cbz@apple.com> 
+                        if we wanted to support changing the MTU */
+               {
+                       struct ifreq *ifr = (struct ifreq *)data;
+                       struct bridge_iflist *bif;
+                       struct ifnet *dst_if;
+                       sc->sc_if.if_mtu = ifr->ifr_mtu;
+                       LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
+                               dst_if = bif->bif_ifp;
+                               error = ifnet_ioctl(dst_if, 0, cmd, data);
+                               if (error)
+                                       break;
+                       }
+               }
+#else
+                       /* Do not allow the MTU to be changed on the bridge */
+                       error = EINVAL;
+#endif
+                       break;
+                       
+                       /* APPLE MODIFICATION - don't pass this down to ether_ioctl, just indicate we don't handle it */
+               case SIOCGIFMEDIA:
+                       error = EINVAL;
+                       break;
+                       
+               case SIOCSIFLLADDR:
+                       error = ifnet_set_lladdr(ifp, ifr->ifr_addr.sa_data, ifr->ifr_addr.sa_len);
+                       if (error != 0)
+                               printf("bridge_ioctl: ifnet_set_lladdr failed %d\n", error);
+                       break;
+                       
+               default:
+                       error = ether_ioctl(ifp, cmd, data);
+#if BRIDGE_DEBUG
+                       if (error != 0)
+                               printf("bridge_ioctl: ether_ioctl ifp %p cmd 0x%08lx (%c%c [%lu] %c %lu) failed error: %d\n", 
+                                          ifp, 
+                                          cmd, 
+                                          (cmd & IOC_IN) ? 'I' : ' ',
+                                          (cmd & IOC_OUT) ? 'O' : ' ',
+                                          IOCPARM_LEN(cmd),
+                                          (char) IOCGROUP(cmd),
+                                          cmd & 0xff,
+                                          error);
+#endif /* BRIDGE_DEBUG */
+                       break;
+       }
+       lck_mtx_assert(sc->sc_mtx, LCK_MTX_ASSERT_NOTOWNED);
+       
+       return (error);
+}
+
+/*
+ * bridge_mutecaps:
+ *
+ *     Clear or restore unwanted capabilities on the member interface
+ */
+#if HAS_IF_CAP
+void
+bridge_mutecaps(struct bridge_iflist *bif, int mute)
+{
+       struct ifnet *ifp = bif->bif_ifp;
+       struct ifcapreq ifcr;
+       
+       if (ifp->if_ioctl == NULL)
+               return;
+       
+       memset(&ifcr, 0, sizeof(ifcr));
+       ifcr.ifcr_capenable = ifp->if_capenable;
+       
+       if (mute) {
+               /* mask off and save capabilities */
+               bif->bif_mutecap = ifcr.ifcr_capenable & BRIDGE_IFCAPS_MASK;
+               if (bif->bif_mutecap != 0)
+                       ifcr.ifcr_capenable &= ~BRIDGE_IFCAPS_MASK;
+       } else
+       /* restore muted capabilities */
+               ifcr.ifcr_capenable |= bif->bif_mutecap;
+       
+       if (bif->bif_mutecap != 0) {
+               (void) (*ifp->if_ioctl)(ifp, SIOCSIFCAP, (caddr_t)&ifcr);
+       }
+}
+#endif /* HAS_IF_CAP */
+
+/*
+ * bridge_lookup_member:
+ */
+static struct bridge_iflist *
+bridge_lookup_member(struct bridge_softc *sc, const char *name)
+{
+       struct bridge_iflist *bif;
+       struct ifnet *ifp;
+       char if_xname[IFNAMSIZ];
+       
+       lck_mtx_assert(sc->sc_mtx, LCK_MTX_ASSERT_OWNED);
+
+       LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
+               ifp = bif->bif_ifp;
+               snprintf(if_xname, sizeof(if_xname), "%s%d", 
+                 ifnet_name(ifp), ifnet_unit(ifp));
+               if (strncmp(if_xname, name, sizeof(if_xname)) == 0)
+                       return (bif);
+       }
+       
+       return (NULL);
+}
+
+/*
+ * bridge_lookup_member_if:
+ */
+static struct bridge_iflist *
+bridge_lookup_member_if(struct bridge_softc *sc, struct ifnet *member_ifp)
+{
+       struct bridge_iflist *bif;
+       
+       lck_mtx_assert(sc->sc_mtx, LCK_MTX_ASSERT_OWNED);
+
+       LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
+               if (bif->bif_ifp == member_ifp)
+                       return (bif);
+       }
+       
+       return (NULL);
+}
+
+static errno_t 
+bridge_iff_input(void* cookie, ifnet_t ifp, __unused protocol_family_t protocol,
+                 mbuf_t *data, char **frame_ptr)
+{
+       errno_t error = 0;
+       struct bridge_iflist *bif = (struct bridge_iflist *)cookie;
+       struct bridge_softc *sc = bif->bif_sc;
+       int included = 0;
+       size_t frmlen = 0;
+       mbuf_t m = *data;
+
+       if ((m->m_flags & M_PROTO1))
+               goto out;
+       
+       if (*frame_ptr >= (char *)mbuf_datastart(m) && *frame_ptr <= (char *)mbuf_data(m)) {
+               included = 1;
+               frmlen = (char *)mbuf_data(m) - *frame_ptr;
+       }
+#if BRIDGE_DEBUG
+       if (_if_brige_debug) {
+               printf("bridge_iff_input %s%d from %s%d m %p data %p frame %p %s frmlen %lu\n", 
+                          ifnet_name(sc->sc_if), ifnet_unit(sc->sc_if),
+                          ifnet_name(ifp), ifnet_unit(ifp), 
+                          m, mbuf_data(m), *frame_ptr, included ? "inside" : "outside", frmlen);
+               
+               if (_if_brige_debug > 1) {
+                       printf_mbuf(m, "bridge_iff_input[", "\n");
+                       printf_ether_header((struct ether_header *)*frame_ptr);
+                       printf_mbuf_data(m, 0, 20);
+                       printf("\n");
+               }
+       }
+#endif /* BRIDGE_DEBUG */
+
+       /* Move data pointer to start of frame to the link layer header */
+       if (included) {
+               (void) mbuf_setdata(m, (char *)mbuf_data(m) - frmlen, mbuf_len(m) + frmlen);
+               (void) mbuf_pkthdr_adjustlen(m, frmlen);
+       } else {
+               printf("bridge_iff_input: frame_ptr outside mbuf\n");
+               goto out;
+       }
+       
+       error = bridge_input(bif, ifp, m, *frame_ptr);
+       
+       /* Adjust packet back to original */
+       if (error == 0) {
+               (void) mbuf_setdata(m, (char *)mbuf_data(m) + frmlen, mbuf_len(m) - frmlen);
+               (void) mbuf_pkthdr_adjustlen(m, -frmlen);
+       }
+#if BRIDGE_DEBUG
+       if (_if_brige_debug > 1) {
+               printf("\n");
+               printf_mbuf(m, "bridge_iff_input]", "\n");
+       }
+#endif /* BRIDGE_DEBUG */
+
+out:
+       lck_mtx_assert(sc->sc_mtx, LCK_MTX_ASSERT_NOTOWNED);
+       
+       return error;
+}
+
+
+#if BRIDGE_MEMBER_OUT_FILTER
+static errno_t
+bridge_iff_output(void *cookie, ifnet_t ifp, __unused protocol_family_t protocol, mbuf_t *data)
+{
+       errno_t error = 0;
+       struct bridge_iflist *bif = (struct bridge_iflist *)cookie;
+       struct bridge_softc *sc = bif->bif_sc;
+       mbuf_t m = *data;
+       
+       if ((m->m_flags & M_PROTO1))
+               goto out;
+       
+#if BRIDGE_DEBUG
+       if (_if_brige_debug) {
+               printf("bridge_iff_output %s%d from %s%d m %p data %p\n", 
+                               ifnet_name(sc->sc_if), ifnet_unit(sc->sc_if),
+                               ifnet_name(ifp), ifnet_unit(ifp), 
+                               m, mbuf_data(m));
+       }
+#endif /* BRIDGE_DEBUG */
+
+       error = bridge_output(sc, ifp, m);
+       if (error != 0) {
+               printf("bridge_iff_output: bridge_output failed error %d\n", error);
+       }
+
+out:   
+       lck_mtx_assert(sc->sc_mtx, LCK_MTX_ASSERT_NOTOWNED);
+
+       return error;
+}
+#endif /* BRIDGE_MEMBER_OUT_FILTER */
+
+
+static void 
+bridge_iff_event(void* cookie, ifnet_t ifp, __unused protocol_family_t protocol,
+                 const struct kev_msg *event_msg)
+{
+       struct bridge_iflist *bif = (struct bridge_iflist *)cookie;
+       
+       if (event_msg->vendor_code == KEV_VENDOR_APPLE && 
+               event_msg->kev_class == KEV_NETWORK_CLASS &&
+               event_msg->kev_subclass == KEV_DL_SUBCLASS) {
+               switch (event_msg->event_code) {
+                       case KEV_DL_IF_DETACHING:
+                               bridge_ifdetach(bif, ifp);
+                               break;
+                               
+                       default:
+                               break;
+               }
+       }               
+}
+
+static void 
+bridge_iff_detached(void* cookie, __unused ifnet_t interface)
+{
+       struct bridge_iflist *bif = (struct bridge_iflist *)cookie;
+       
+       _FREE(bif, M_DEVBUF);
+       
+       return;
+}
+
+/*
+ * bridge_delete_member:
+ *
+ *     Delete the specified member interface.
+ */
+static void
+bridge_delete_member(struct bridge_softc *sc, struct bridge_iflist *bif)
+{
+       struct ifnet *ifs = bif->bif_ifp;
+       
+       switch (ifnet_type(ifs)) {
+        case IFT_ETHER:
+            /*
+             * Take the interface out of promiscuous mode.
+             */
+            (void) ifnet_set_promiscuous(ifs, 0);
+            break;
+#if NGIF > 0
+        case IFT_GIF:
+            break;
+#endif
+        default:
+#ifdef DIAGNOSTIC
+            panic("bridge_delete_member: impossible");
+#endif
+            break;
+       }
+
+       ifs->if_bridge = NULL;
+       LIST_REMOVE(bif, bif_next);
+
+       /* Respect lock ordering with DLIL lock */
+       lck_mtx_unlock(sc->sc_mtx);
+       iflt_detach(bif->bif_iff_ref);
+       lck_mtx_lock(sc->sc_mtx);
+       
+       bridge_rtdelete(sc, ifs, IFBF_FLUSHALL);
+       
+       if (ifnet_flags(sc->sc_if) & IFF_RUNNING)
+               bstp_initialization(sc);
+       
+       /* On the last deleted interface revert the MTU */
+       
+       if (LIST_EMPTY(&sc->sc_iflist))
+               (void) ifnet_set_mtu(sc->sc_if, ETHERMTU);
+}
+
+static int
+bridge_ioctl_add(struct bridge_softc *sc, void *arg)
+{
+       struct ifbreq *req = arg;
+       struct bridge_iflist *bif = NULL;
+       struct ifnet *ifs;
+       int error = 0;
+       /* APPLE MODIFICATION <cbz@apple.com> - is this a proxy sta being added? */
+#if IEEE80211_PROXYSTA
+       struct bridge_rtnode *brt;
+#endif
+       
+       error = ifnet_find_by_name(req->ifbr_ifsname, &ifs);
+       if (error || ifs == NULL)
+               return (ENOENT);
+       
+       /* Is the interface already attached to this bridge interface */
+       if (ifs->if_bridge == sc)
+               return (EEXIST);
+       
+       if (ifs->if_bridge != NULL)
+               return (EBUSY);
+       
+       /* First added interface resets the MTU */
+       
+       if (LIST_EMPTY(&sc->sc_iflist))
+               (void) ifnet_set_mtu(sc->sc_if, ETHERMTU);
+       
+       if (ifnet_mtu(sc->sc_if) != ifnet_mtu(ifs))
+               return (EINVAL);
+
+       bif = _MALLOC(sizeof(*bif), M_DEVBUF, M_WAITOK|M_ZERO);
+       if (bif == NULL)
+               return (ENOMEM);
+       
+       bif->bif_ifp = ifs;
+       bif->bif_flags = IFBIF_LEARNING | IFBIF_DISCOVER;
+       bif->bif_priority = BSTP_DEFAULT_PORT_PRIORITY;
+       bif->bif_path_cost = BSTP_DEFAULT_PATH_COST;
+       bif->bif_sc = sc;
+       
+       switch (ifnet_type(ifs)) {
+        case IFT_ETHER:
+            /*
+             * Place the interface into promiscuous mode.
+             */
+            error = ifnet_set_promiscuous(ifs, 1);
+            if (error)
+                goto out;
+#if HAS_IF_CAP            
+            bridge_mutecaps(bif, 1);
+#endif
+            break;
+#if NGIF > 0
+            case IFT_GIF:
+            break;
+#endif
+            default:
+            error = EINVAL;
+            goto out;
+       }
+       
+       /*
+        * If the LINK0 flag is set, and this is the first member interface,
+        * attempt to inherit its link-layer address.
+        */
+       if ((ifnet_flags(sc->sc_if) & IFF_LINK0) && LIST_EMPTY(&sc->sc_iflist) &&
+           ifnet_type(ifs) == IFT_ETHER) {
+           (void) ifnet_set_lladdr(sc->sc_if, ifnet_lladdr(ifs),
+                                                       ETHER_ADDR_LEN);
+       }
+       
+       // install an interface filter
+       {
+               struct iff_filter iff;
+               
+               memset(&iff, 0, sizeof(struct iff_filter));
+               
+               iff.iff_cookie = bif;
+               iff.iff_name = "com.apple.kernel.bsd.net.if_bridge";
+               iff.iff_input = bridge_iff_input;
+#if BRIDGE_MEMBER_OUT_FILTER
+               iff.iff_output = bridge_iff_output;
+#endif /* BRIDGE_MEMBER_OUT_FILTER */
+               iff.iff_event = bridge_iff_event;
+               iff.iff_detached = bridge_iff_detached;
+               
+               /* Respect lock ordering with DLIL lock */
+               lck_mtx_unlock(sc->sc_mtx);
+               error = iflt_attach(ifs, &iff, &bif->bif_iff_ref);
+               lck_mtx_lock(sc->sc_mtx);
+               if (error != 0) {
+                       printf("bridge_ioctl_add: iflt_attach failed %d\n", error);
+                       goto out;
+               }
+       }
+       ifs->if_bridge = sc;
+       LIST_INSERT_HEAD(&sc->sc_iflist, bif, bif_next);
+       
+       
+       if (ifnet_flags(sc->sc_if) & IFF_RUNNING)
+               bstp_initialization(sc);
+       else
+               bstp_stop(sc);
+       
+       /* APPLE MODIFICATION <cbz@apple.com> - is this a proxy sta being added? */
+#if IEEE80211_PROXYSTA
+       brt = bridge_rtnode_lookup(sc, ifnet_lladdr(ifs));
+       if (brt) {
+#if DIAGNOSTIC
+               printf( "%s: attach %s to bridge as proxysta for %02x:%02x:%02x:%02x:%02x:%02x discovered on %s\n",
+               __func__, ifs->if_xname, brt->brt_addr[0], brt->brt_addr[1], brt->brt_addr[2], 
+               brt->brt_addr[3], brt->brt_addr[4], brt->brt_addr[5], brt->brt_ifp->if_xname );
+#endif
+               brt->brt_ifp_proxysta = ifs;
+       }
+#endif
+       
+       
+out:
+       if (error) {
+               if (bif != NULL)
+                       _FREE(bif, M_DEVBUF);
+       }
+       return (error);
+}
+
+static int
+bridge_ioctl_del(struct bridge_softc *sc, void *arg)
+{
+       struct ifbreq *req = arg;
+       struct bridge_iflist *bif;
+       
+       bif = bridge_lookup_member(sc, req->ifbr_ifsname);
+       if (bif == NULL)
+               return (ENOENT);
+       
+       bridge_delete_member(sc, bif);
+       
+       return (0);
+}
+
+/* APPLE MODIFICATION <cbz@apple.com> - add support for Proxy STA */
+#if IEEE80211_PROXYSTA
+static int
+bridge_ioctl_purge(struct bridge_softc *sc, void *arg)
+{
+       struct ifbreq *req = arg;
+       struct bridge_iflist *bif;
+       struct ifnet *ifs;
+       
+       bif = bridge_lookup_member(sc, req->ifbr_ifsname);
+       if (bif == NULL)
+               return (ENOENT);
+       
+       ifs = bif->bif_ifp;
+       bridge_rtpurge(sc, ifs);
+       
+       return (0);
+}
+#endif
+
+static int
+bridge_ioctl_gifflags(struct bridge_softc *sc, void *arg)
+{
+       struct ifbreq *req = arg;
+       struct bridge_iflist *bif;
+       
+       bif = bridge_lookup_member(sc, req->ifbr_ifsname);
+       if (bif == NULL)
+               return (ENOENT);
+       
+       req->ifbr_ifsflags = bif->bif_flags;
+       req->ifbr_state = bif->bif_state;
+       req->ifbr_priority = bif->bif_priority;
+       req->ifbr_path_cost = bif->bif_path_cost;
+       req->ifbr_portno = ifnet_index(bif->bif_ifp) & 0xffff;
+       
+       return (0);
+}
+
+static int
+bridge_ioctl_sifflags(struct bridge_softc *sc, void *arg)
+{
+       struct ifbreq *req = arg;
+       struct bridge_iflist *bif;
+       
+       bif = bridge_lookup_member(sc, req->ifbr_ifsname);
+       if (bif == NULL)
+               return (ENOENT);
+       
+       if (req->ifbr_ifsflags & IFBIF_STP) {
+               switch (ifnet_type(bif->bif_ifp)) {
+            case IFT_ETHER:
+                /* These can do spanning tree. */
+                break;
+                
+            default:
+                /* Nothing else can. */
+                return (EINVAL);
+               }
+       }
+       
+       /* APPLE MODIFICATION <cbz@apple.com> - add support for Proxy STA */
+#if IEEE80211_PROXYSTA
+       if ((bif->bif_flags & IFBIF_PROXYSTA_DISCOVER) && 
+           ((req->ifbr_ifsflags & IFBIF_PROXYSTA_DISCOVER) == 0))
+               bridge_rtpurge(sc, bif->bif_ifp);
+#endif
+       
+       bif->bif_flags = req->ifbr_ifsflags;
+       
+       if (ifnet_flags(sc->sc_if) & IFF_RUNNING)
+               bstp_initialization(sc);
+       
+       /* APPLE MODIFICATION <cbz@apple.com> - add support for Proxy STA */
+#if IEEE80211_PROXYSTA
+       if (bif->bif_flags & IFBIF_PROXYSTA_DISCOVER)
+               bridge_rtdiscovery(sc);
+#endif
+       
+       return (0);
+}
+
+static int
+bridge_ioctl_scache(struct bridge_softc *sc, void *arg)
+{
+       struct ifbrparam *param = arg;
+       
+       sc->sc_brtmax = param->ifbrp_csize;
+       bridge_rttrim(sc);
+       
+       return (0);
+}
+
+static int
+bridge_ioctl_gcache(struct bridge_softc *sc, void *arg)
+{
+       struct ifbrparam *param = arg;
+       
+       param->ifbrp_csize = sc->sc_brtmax;
+       
+       return (0);
+}
+
+#define BRIDGE_IOCTL_GIFS \
+       struct bridge_iflist *bif; \
+       struct ifbreq breq; \
+       int count, error = 0; \
+       uint32_t len; \
+       \
+       count = 0; \
+       LIST_FOREACH(bif, &sc->sc_iflist, bif_next) \
+       count++; \
+       \
+       if (bifc->ifbic_len == 0) { \
+               bifc->ifbic_len = sizeof(breq) * count; \
+               return (0); \
+       } \
+       \
+       count = 0; \
+       len = bifc->ifbic_len; \
+       memset(&breq, 0, sizeof breq); \
+       LIST_FOREACH(bif, &sc->sc_iflist, bif_next) { \
+               if (len < sizeof(breq)) \
+                       break; \
+       \
+               snprintf(breq.ifbr_ifsname, sizeof(breq.ifbr_ifsname), "%s%d", \
+                 ifnet_name(bif->bif_ifp), ifnet_unit(bif->bif_ifp)); \
+               breq.ifbr_ifsflags = bif->bif_flags; \
+               breq.ifbr_state = bif->bif_state; \
+               breq.ifbr_priority = bif->bif_priority; \
+               breq.ifbr_path_cost = bif->bif_path_cost; \
+               breq.ifbr_portno = ifnet_index(bif->bif_ifp) & 0xffff; \
+               error = copyout(&breq, bifc->ifbic_req + count * sizeof(breq), sizeof(breq)); \
+               if (error) \
+                       break; \
+               count++; \
+               len -= sizeof(breq); \
+       } \
+       \
+       bifc->ifbic_len = sizeof(breq) * count
+
+
+static int
+bridge_ioctl_gifs64(struct bridge_softc *sc, void *arg)
+{
+       struct ifbifconf64 *bifc = arg;
+       
+       BRIDGE_IOCTL_GIFS;
+
+       return (error);
+}
+
+static int
+bridge_ioctl_gifs32(struct bridge_softc *sc, void *arg)
+{
+       struct ifbifconf32 *bifc = arg;
+
+       BRIDGE_IOCTL_GIFS;
+
+       return (error);
+}
+
+#define BRIDGE_IOCTL_RTS \
+       struct bridge_rtnode *brt; \
+       int count = 0, error = 0; \
+       uint32_t len; \
+       struct timespec now; \
+        \
+       if (bac->ifbac_len == 0) \
+               return (0); \
+        \
+       len = bac->ifbac_len; \
+       LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) { \
+               if (len < sizeof(bareq)) \
+                       goto out; \
+               memset(&bareq, 0, sizeof(bareq)); \
+               snprintf(bareq.ifba_ifsname, sizeof(bareq.ifba_ifsname), "%s%d", \
+                 ifnet_name(brt->brt_ifp), ifnet_unit(brt->brt_ifp)); \
+               memcpy(bareq.ifba_dst, brt->brt_addr, sizeof(brt->brt_addr)); \
+               if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) { \
+                       nanouptime(&now); \
+                       if (brt->brt_expire >= (unsigned long)now.tv_sec) \
+                               bareq.ifba_expire = brt->brt_expire - now.tv_sec; \
+                       else \
+                               bareq.ifba_expire = 0; \
+               } else \
+                       bareq.ifba_expire = 0; \
+               bareq.ifba_flags = brt->brt_flags; \
+                \
+               error = copyout(&bareq, bac->ifbac_req + count * sizeof(bareq), sizeof(bareq)); \
+               if (error) \
+                       goto out; \
+               count++; \
+               len -= sizeof(bareq); \
+       } \
+out: \
+       bac->ifbac_len = sizeof(bareq) * count
+       
+
+static int
+bridge_ioctl_rts64(struct bridge_softc *sc, void *arg)
+{
+       struct ifbaconf64 *bac = arg;
+       struct ifbareq64 bareq;
+       
+       BRIDGE_IOCTL_RTS;
+
+       return (error);
+}
+
+static int
+bridge_ioctl_rts32(struct bridge_softc *sc, void *arg)
+{
+       struct ifbaconf32 *bac = arg;
+       struct ifbareq32 bareq;
+       
+       BRIDGE_IOCTL_RTS;
+
+       return (error);
+}
+
+static int
+bridge_ioctl_saddr64(struct bridge_softc *sc, void *arg)
+{
+       struct ifbareq64 *req = arg;
+       struct bridge_iflist *bif;
+       int error;
+       
+       bif = bridge_lookup_member(sc, req->ifba_ifsname);
+       if (bif == NULL)
+               return (ENOENT);
+       
+       error = bridge_rtupdate(sc, req->ifba_dst, bif->bif_ifp, 1,
+                            req->ifba_flags);
+       
+       return (error);
+}
+
+static int
+bridge_ioctl_saddr32(struct bridge_softc *sc, void *arg)
+{
+       struct ifbareq32 *req = arg;
+       struct bridge_iflist *bif;
+       int error;
+       
+       bif = bridge_lookup_member(sc, req->ifba_ifsname);
+       if (bif == NULL)
+               return (ENOENT);
+       
+       error = bridge_rtupdate(sc, req->ifba_dst, bif->bif_ifp, 1,
+                            req->ifba_flags);
+       
+       return (error);
+}
+
+static int
+bridge_ioctl_sto(struct bridge_softc *sc, void *arg)
+{
+       struct ifbrparam *param = arg;
+       
+       sc->sc_brttimeout = param->ifbrp_ctime;
+       
+       return (0);
+}
+
+static int
+bridge_ioctl_gto(struct bridge_softc *sc, void *arg)
+{
+       struct ifbrparam *param = arg;
+       
+       param->ifbrp_ctime = sc->sc_brttimeout;
+       
+       return (0);
+}
+
+static int
+bridge_ioctl_daddr64(struct bridge_softc *sc, void *arg)
+{
+       struct ifbareq64 *req = arg;
+       
+       return (bridge_rtdaddr(sc, req->ifba_dst));
+}
+
+static int
+bridge_ioctl_daddr32(struct bridge_softc *sc, void *arg)
+{
+       struct ifbareq32 *req = arg;
+       
+       return (bridge_rtdaddr(sc, req->ifba_dst));
+}
+
+static int
+bridge_ioctl_flush(struct bridge_softc *sc, void *arg)
+{
+       struct ifbreq *req = arg;
+       
+       bridge_rtflush(sc, req->ifbr_ifsflags);
+       
+       return (0);
+}
+
+static int
+bridge_ioctl_gpri(struct bridge_softc *sc, void *arg)
+{
+       struct ifbrparam *param = arg;
+       
+       param->ifbrp_prio = sc->sc_bridge_priority;
+       
+       return (0);
+}
+
+static int
+bridge_ioctl_spri(struct bridge_softc *sc, void *arg)
+{
+       struct ifbrparam *param = arg;
+       
+       sc->sc_bridge_priority = param->ifbrp_prio;
+       
+       if (ifnet_flags(sc->sc_if) & IFF_RUNNING)
+               bstp_initialization(sc);
+       
+       return (0);
+}
+
+static int
+bridge_ioctl_ght(struct bridge_softc *sc, void *arg)
+{
+       struct ifbrparam *param = arg;
+       
+       param->ifbrp_hellotime = sc->sc_bridge_hello_time >> 8;
+       
+       return (0);
+}
+
+static int
+bridge_ioctl_sht(struct bridge_softc *sc, void *arg)
+{
+       struct ifbrparam *param = arg;
+       
+       if (param->ifbrp_hellotime == 0)
+               return (EINVAL);
+       sc->sc_bridge_hello_time = param->ifbrp_hellotime << 8;
+       
+       if (ifnet_flags(sc->sc_if) & IFF_RUNNING)
+               bstp_initialization(sc);
+       
+       return (0);
+}
+
+static int
+bridge_ioctl_gfd(struct bridge_softc *sc, void *arg)
+{
+       struct ifbrparam *param = arg;
+       
+       param->ifbrp_fwddelay = sc->sc_bridge_forward_delay >> 8;
+       
+       return (0);
+}
+
+static int
+bridge_ioctl_sfd(struct bridge_softc *sc, void *arg)
+{
+       struct ifbrparam *param = arg;
+       
+       if (param->ifbrp_fwddelay == 0)
+               return (EINVAL);
+       sc->sc_bridge_forward_delay = param->ifbrp_fwddelay << 8;
+       
+       if (ifnet_flags(sc->sc_if) & IFF_RUNNING)
+               bstp_initialization(sc);
+       
+       return (0);
+}
+
+static int
+bridge_ioctl_gma(struct bridge_softc *sc, void *arg)
+{
+       struct ifbrparam *param = arg;
+       
+       param->ifbrp_maxage = sc->sc_bridge_max_age >> 8;
+       
+       return (0);
+}
+
+static int
+bridge_ioctl_sma(struct bridge_softc *sc, void *arg)
+{
+       struct ifbrparam *param = arg;
+       
+       if (param->ifbrp_maxage == 0)
+               return (EINVAL);
+       sc->sc_bridge_max_age = param->ifbrp_maxage << 8;
+       
+       if (ifnet_flags(sc->sc_if) & IFF_RUNNING)
+               bstp_initialization(sc);
+       
+       return (0);
+}
+
+static int
+bridge_ioctl_sifprio(struct bridge_softc *sc, void *arg)
+{
+       struct ifbreq *req = arg;
+       struct bridge_iflist *bif;
+       
+       bif = bridge_lookup_member(sc, req->ifbr_ifsname);
+       if (bif == NULL)
+               return (ENOENT);
+       
+       bif->bif_priority = req->ifbr_priority;
+       
+       if (ifnet_flags(sc->sc_if) & IFF_RUNNING)
+               bstp_initialization(sc);
+       
+       return (0);
+}
+
+/* APPLE MODIFICATION <cbz@apple.com> - add support for Proxy STA */
+#if IEEE80211_PROXYSTA
+static void
+bridge_proxysta_notify_macaddr(struct ifnet *ifp, int op, const uint8_t *mac)
+{
+       struct proxy_sta_event iev;
+       
+       memset(&iev, 0, sizeof(iev));
+       memcpy(iev.iev_addr, mac, ETHER_ADDR_LEN);
+       
+       rt_proxystamsg(ifp, op, &iev, sizeof(iev));
+}
+
+static void
+bridge_proxysta_discover(struct ifnet *ifp, const uint8_t *mac)
+{
+       bridge_proxysta_notify_macaddr( ifp, RTM_PROXYSTA_DISCOVERY, mac );
+}
+
+static void
+bridge_proxysta_idle_timeout(struct ifnet *ifp, const uint8_t *mac)
+{
+       bridge_proxysta_notify_macaddr( ifp, RTM_PROXYSTA_IDLE_TIMEOUT, mac );
+}
+#endif
+
+static int
+bridge_ioctl_sifcost(struct bridge_softc *sc, void *arg)
+{
+       struct ifbreq *req = arg;
+       struct bridge_iflist *bif;
+       
+       bif = bridge_lookup_member(sc, req->ifbr_ifsname);
+       if (bif == NULL)
+               return (ENOENT);
+       
+       bif->bif_path_cost = req->ifbr_path_cost;
+       
+       if (ifnet_flags(sc->sc_if) & IFF_RUNNING)
+               bstp_initialization(sc);
+       
+       return (0);
+}
+
+/*
+ * bridge_ifdetach:
+ *
+ *     Detach an interface from a bridge.  Called when a member
+ *     interface is detaching.
+ */
+static void
+bridge_ifdetach(struct bridge_iflist *bif, struct ifnet *ifp)
+{
+       struct bridge_softc *sc = bif->bif_sc;
+       struct ifbreq breq;
+       
+       memset(&breq, 0, sizeof(breq));
+       snprintf(breq.ifbr_ifsname, sizeof(breq.ifbr_ifsname),  "%s%d",
+             ifnet_name(ifp), ifnet_unit(ifp));
+       
+       lck_mtx_lock(sc->sc_mtx);
+       
+       (void) bridge_ioctl_del(sc, &breq);
+       
+       lck_mtx_unlock(sc->sc_mtx);
+}
+
+/*
+ * bridge_init:
+ *
+ *     Initialize a bridge interface.
+ */
+static int
+bridge_init(struct ifnet *ifp)
+{
+       struct bridge_softc *sc = ifnet_softc(ifp);
+       struct timespec ts;
+       errno_t error;
+       
+       if (ifnet_flags(ifp) & IFF_RUNNING)
+               return (0);
+       
+       ts.tv_sec = bridge_rtable_prune_period;
+       ts.tv_nsec = 0;
+       bsd_timeout(bridge_timer, sc, &ts);
+       
+       error = ifnet_set_flags(ifp, IFF_RUNNING, IFF_RUNNING);
+       if (error == 0)
+               bstp_initialization(sc);
+       
+       return error;
+}
+
+/*
+ * bridge_stop:
+ *
+ *     Stop the bridge interface.
+ */
+static void
+bridge_stop(struct ifnet *ifp, __unused int disable)
+{
+       struct bridge_softc *sc = ifnet_softc(ifp);
+       
+       if ((ifnet_flags(ifp) & IFF_RUNNING) == 0)
+               return;
+       
+       bsd_untimeout(bridge_timer, sc);
+       bstp_stop(sc);
+               
+       bridge_rtflush(sc, IFBF_FLUSHDYN);
+       
+       (void) ifnet_set_flags(ifp, 0, IFF_RUNNING);
+}
+
+/*
+ * bridge_enqueue:
+ *
+ *     Enqueue a packet on a bridge member interface.
+ *
+ *     Note: this is called both on the input and output path so this routine 
+ *     cannot simply muck with the HW checksum flag. For the time being we
+ *     rely on the caller to do the right thing.
+ */
+__private_extern__ void
+bridge_enqueue(struct bridge_softc *sc, struct ifnet *dst_ifp, struct mbuf *m)
+{
+       int len, error;
+
+       lck_mtx_assert(sc->sc_mtx, LCK_MTX_ASSERT_OWNED);
+
+#if BRIDGE_DEBUG       
+       if (_if_brige_debug)
+               printf("bridge_enqueue sc %s%d to dst_ifp %s%d m %p\n", 
+                       ifnet_name(sc->sc_if), ifnet_unit(sc->sc_if), 
+                       ifnet_name(dst_ifp), ifnet_unit(dst_ifp), m);
+#endif /* BRIDGE_DEBUG */
+        
+       len = m->m_pkthdr.len;
+       m->m_flags |= M_PROTO1; //set to avoid loops 
+       
+       error = ifnet_output_raw(dst_ifp, 0, m);
+       if (error == 0) {
+               (void) ifnet_stat_increment_out(sc->sc_if, 1, len, 0);
+       } else {
+               (void) ifnet_stat_increment_out(sc->sc_if, 0, 0, 1);
+       }
+       
+       return;
+}
+
+
+#if BRIDGE_MEMBER_OUT_FILTER
+
+/*
+ * bridge_output:
+ *
+ *     Send output from a bridge member interface.  This
+ *     performs the bridging function for locally originated
+ *     packets.
+ *
+ *     The mbuf has the Ethernet header already attached.  We must
+ *     enqueue or free the mbuf before returning.
+ */
+static int
+bridge_output(struct bridge_softc *sc, ifnet_t ifp, mbuf_t m)
+{
+       struct ether_header *eh;
+       struct ifnet *dst_if;
+       
+#if BRIDGE_DEBUG
+       if (_if_brige_debug)
+               printf("bridge_output ifp %p %s%d\n", ifp, ifnet_name(ifp), ifnet_unit(ifp));
+#endif /* BRIDGE_DEBUG */
+       
+       if (m->m_len < ETHER_HDR_LEN) {
+               m = m_pullup(m, ETHER_HDR_LEN);
+               if (m == NULL) {
+                       printf("bridge_output ifp %p m_pullup failed\n", ifp);
+                       return EJUSTRETURN;
+               }
+       }
+       
+       eh = mtod(m, struct ether_header *);
+
+       /* APPLE MODIFICATION <jhw@apple.com>
+        * If the packet is an 802.1X ethertype, then only send on the
+        * original output interface.
+        */
+       if (eh->ether_type == htons(ETHERTYPE_PAE)) {
+               dst_if = ifp;
+               goto sendunicast;
+       }
+       
+       /*
+        * If bridge is down, but the original output interface is up,
+        * go ahead and send out that interface.  Otherwise, the packet
+        * is dropped below.
+        */
+       if ((ifnet_flags(sc->sc_if) & IFF_RUNNING) == 0) {
+               dst_if = ifp;
+               goto sendunicast;
+       }
+       
+       lck_mtx_lock(sc->sc_mtx);
+       
+       /*
+        * If the packet is a multicast, or we don't know a better way to
+        * get there, send to all interfaces.
+        */
+       if (ETHER_IS_MULTICAST(eh->ether_dhost))
+               dst_if = NULL;
+       else
+               dst_if = bridge_rtlookup(sc, eh->ether_dhost);
+       if (dst_if == NULL) {
+               struct bridge_iflist *bif;
+               struct mbuf *mc;
+               int used = 0;
+               
+               LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
+                       dst_if = bif->bif_ifp;
+                       if ((ifnet_flags(dst_if) & IFF_RUNNING) == 0)
+                               continue;
+                       
+                       /*
+                        * If this is not the original output interface,
+                        * and the interface is participating in spanning
+                        * tree, make sure the port is in a state that
+                        * allows forwarding.
+                        */
+                       if (dst_if != ifp &&
+                               (bif->bif_flags & IFBIF_STP) != 0) {
+                               switch (bif->bif_state) {
+                                       case BSTP_IFSTATE_BLOCKING:
+                                       case BSTP_IFSTATE_LISTENING:
+                                       case BSTP_IFSTATE_DISABLED:
+                                               continue;
+                               }
+                       }
+                       
+                       if (LIST_NEXT(bif, bif_next) == NULL) {
+                               used = 1;
+                               mc = m;
+                       } else {
+                               mc = m_copym(m, 0, M_COPYALL, M_NOWAIT);
+                               if (mc == NULL) {
+                                       printf("bridge_output ifp %p m_copym failed\n", ifp);
+                                       (void) ifnet_stat_increment_out(sc->sc_if, 0, 0, 1);
+                                       continue;
+                               }
+                       }
+                       
+                       bridge_enqueue(sc, dst_if, mc);
+               }
+               if (used == 0) {
+                       printf("bridge_output ifp %p not used\n", ifp);
+                       m_freem(m);
+               }
+               lck_mtx_unlock(sc->sc_mtx);
+               
+               return EJUSTRETURN;
+       }
+       
+sendunicast:
+       /*
+        * XXX Spanning tree consideration here?
+        */
+       
+       if ((ifnet_flags(dst_if) & IFF_RUNNING) == 0) {
+               printf("bridge_output ifp %p dst_if %p not running\n", ifp, dst_if);
+               m_freem(m);
+                               
+               return EJUSTRETURN;
+       }
+       
+       if (dst_if != ifp) {
+               lck_mtx_lock(sc->sc_mtx);
+
+               bridge_enqueue(sc, dst_if, m);
+       
+               lck_mtx_unlock(sc->sc_mtx);
+
+               return EJUSTRETURN;
+       }
+               
+       return (0);
+}
+#endif /* BRIDGE_MEMBER_OUT_FILTER */
+
+#if APPLE_BRIDGE_HWCKSUM_SUPPORT
+static struct mbuf* bridge_fix_txcsum( struct mbuf *m )
+{
+       //      basic tests indicate that the vast majority of packets being processed
+       //      here have an Ethernet header mbuf pre-pended to them (the first case below)
+       //      the second highest are those where the Ethernet and IP/TCP/UDP headers are 
+       //      all in one mbuf (second case below)
+       //      the third case has, in fact, never hit for me -- although if I comment out 
+       //      the first two cases, that code works for them, so I consider it a 
+       //      decent general solution
+       
+       int amt = ETHER_HDR_LEN;
+       int hlen = M_CSUM_DATA_IPv4_IPHL( m->m_pkthdr.csum_data );
+       int off = M_CSUM_DATA_IPv4_OFFSET( m->m_pkthdr.csum_data );
+       
+       /* 
+        * NOTE we should never get vlan-attached packets here;
+        * support for those COULD be added, but we don't use them
+        * and it really kinda slows things down to worry about them
+        */
+       
+#ifdef DIAGNOSTIC
+       if ( m_tag_find( m, PACKET_TAG_VLAN, NULL ) != NULL )
+       {
+               printf( "bridge: transmitting packet tagged with VLAN?\n" );
+               KASSERT( 0 );
+               m_freem( m );
+               return NULL;
+       }
+#endif
+       
+       if ( m->m_pkthdr.csum_flags & M_CSUM_IPv4 )
+       {
+               amt += hlen;
+       }
+       if ( m->m_pkthdr.csum_flags & M_CSUM_TCPv4 )
+       {
+               amt += off + sizeof( uint16_t );
+       }
+       
+       if ( m->m_pkthdr.csum_flags & M_CSUM_UDPv4 )
+       {
+               amt += off + sizeof( uint16_t );
+       }
+       
+       if ( m->m_len == ETHER_HDR_LEN )
+       {
+               // this is the case where there's an Ethernet header in an mbuf
+        
+               // the first mbuf is the Ethernet header -- just strip it off and do the checksum
+               struct mbuf *m_ip = m->m_next;
+        
+               // set up m_ip so the cksum operations work
+               /* APPLE MODIFICATION 22 Apr 2008 <mvega@apple.com>
+                *  <rdar://5817385> Clear the m_tag list before setting
+                *  M_PKTHDR.
+                *
+                *  If this m_buf chain was extended via M_PREPEND(), then
+                *  m_ip->m_pkthdr is identical to m->m_pkthdr (see
+                *  M_MOVE_PKTHDR()). The only thing preventing access to this
+                *  invalid packet header data is the fact that the M_PKTHDR
+                *  flag is clear, i.e., m_ip->m_flag & M_PKTHDR == 0, but we're
+                *  about to set the M_PKTHDR flag, so to be safe we initialize,
+                *  more accurately, we clear, m_ip->m_pkthdr.tags via
+                *  m_tag_init().
+                *
+                *  Suppose that we do not do this; if m_pullup(), below, fails,
+                *  then m_ip will be freed along with m_ip->m_pkthdr.tags, but
+                *  we will also free m soon after, via m_freem(), and
+                *  consequently attempt to free m->m_pkthdr.tags in the
+                *  process. The problem is that m->m_pkthdr.tags will have
+                *  already been freed by virtue of being equal to
+                *  m_ip->m_pkthdr.tags. Attempts to dereference
+                *  m->m_pkthdr.tags in m_tag_delete_chain() will result in a
+                *  panic.
+                */
+               m_tag_init(m_ip);
+               /* END MODIFICATION */
+               m_ip->m_flags |= M_PKTHDR;
+               m_ip->m_pkthdr.csum_flags = m->m_pkthdr.csum_flags;
+               m_ip->m_pkthdr.csum_data = m->m_pkthdr.csum_data;
+               m_ip->m_pkthdr.len = m->m_pkthdr.len - ETHER_HDR_LEN;
+        
+               // set up the header mbuf so we can prepend it back on again later
+               m->m_pkthdr.csum_flags = 0;
+               m->m_pkthdr.csum_data = 0;
+               m->m_pkthdr.len = ETHER_HDR_LEN;
+               m->m_next = NULL;
+        
+        
+               // now do the checksums we need -- first IP
+               if ( m_ip->m_pkthdr.csum_flags & M_CSUM_IPv4 )
+               {
+                       // make sure the IP header (or at least the part with the cksum) is there
+                       m_ip = m_pullup( m_ip, sizeof( struct ip ) );
+                       if ( m_ip == NULL )
+                       {
+                               printf( "bridge: failed to flatten header\n ");
+                               m_freem( m );
+                               return NULL;
+                       }
+                       
+                       // now do the checksum
+                       {
+                               struct ip *ip = mtod( m_ip, struct ip* );
+                               ip->ip_sum = in_cksum( m_ip, hlen );
+                
+#ifdef VERY_VERY_VERY_DIAGNOSTIC
+                               printf( "bridge: performed IPv4 checksum\n" );
+#endif
+                       }
+               }
+        
+               // now do a TCP or UDP delayed checksum
+               if ( m_ip->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_UDPv4) )
+               {
+                       in_delayed_cksum( m_ip );
+            
+#ifdef VERY_VERY_VERY_DIAGNOSTIC
+                       printf( "bridge: performed TCPv4/UDPv4 checksum\n" );
+#endif
+               }
+        
+               // now attach the ethernet header back onto the IP packet
+               m->m_next = m_ip;
+               m->m_pkthdr.len += m_length( m_ip );    
+        
+               // clear the M_PKTHDR flags on the ip packet (again, we re-attach later)
+               m_ip->m_flags &= ~M_PKTHDR;
+        
+               // and clear any csum flags
+               m->m_pkthdr.csum_flags &= ~(M_CSUM_TCPv4|M_CSUM_UDPv4|M_CSUM_IPv4);
+       }
+       else if ( m->m_len >= amt )
+       {
+               // everything fits in the first mbuf, so futz with m->m_data, m->m_len and m->m_pkthdr.len to
+               // make it work
+               m->m_len -= ETHER_HDR_LEN;
+               m->m_data += ETHER_HDR_LEN;
+               m->m_pkthdr.len -= ETHER_HDR_LEN;
+        
+               // now do the checksums we need -- first IP
+               if ( m->m_pkthdr.csum_flags & M_CSUM_IPv4 )
+               {
+                       struct ip *ip = mtod( m, struct ip* );
+                       ip->ip_sum = in_cksum( m, hlen );
+            
+#ifdef VERY_VERY_VERY_DIAGNOSTIC
+                       printf( "bridge: performed IPv4 checksum\n" );
+#endif
+               }
+        
+               // now do a TCP or UDP delayed checksum
+               if ( m->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_UDPv4) )
+               {
+                       in_delayed_cksum( m );
+            
+#ifdef VERY_VERY_VERY_DIAGNOSTIC
+                       printf( "bridge: performed TCPv4/UDPv4 checksum\n" );
+#endif
+               }
+               
+               // now stick the ethernet header back on
+               m->m_len += ETHER_HDR_LEN;
+               m->m_data -= ETHER_HDR_LEN;
+               m->m_pkthdr.len += ETHER_HDR_LEN;
+        
+               // and clear any csum flags
+               m->m_pkthdr.csum_flags &= ~(M_CSUM_TCPv4|M_CSUM_UDPv4|M_CSUM_IPv4);
+       }
+       else
+       {
+               struct mbuf *m_ip;
+        
+               // general case -- need to simply split it off and deal
+        
+               // first, calculate how much needs to be made writable (we may have a read-only mbuf here)
+               hlen = M_CSUM_DATA_IPv4_IPHL( m->m_pkthdr.csum_data );
+#if PARANOID
+               off = M_CSUM_DATA_IPv4_OFFSET( m->m_pkthdr.csum_data );
+               
+               if ( m->m_pkthdr.csum_flags & M_CSUM_IPv4 )
+               {
+                       amt += hlen;
+               }
+               
+               if ( m->m_pkthdr.csum_flags & M_CSUM_TCPv4 )
+               {
+                       amt += sizeof( struct tcphdr * );
+                       amt += off;
+               }
+               
+               if ( m->m_pkthdr.csum_flags & M_CSUM_UDPv4 )
+               {
+                       amt += sizeof( struct udphdr * );
+                       amt += off;
+               }
+#endif
+        
+               // now split the ethernet header off of the IP packet (we'll re-attach later)
+               m_ip = m_split( m, ETHER_HDR_LEN, M_NOWAIT );
+               if ( m_ip == NULL )
+               {
+                       printf( "bridge_fix_txcsum: could not split ether header\n" );
+            
+                       m_freem( m );
+                       return NULL;
+               }
+        
+#if PARANOID
+               // make sure that the IP packet is writable for the portion we need
+               if ( m_makewritable( &m_ip, 0, amt, M_DONTWAIT ) != 0 )
+               {
+                       printf( "bridge_fix_txcsum: could not make %d bytes writable\n", amt );
+            
+                       m_freem( m );
+                       m_freem( m_ip );
+                       return NULL;
+               }
+#endif
+               
+               m_ip->m_pkthdr.csum_flags = m->m_pkthdr.csum_flags;
+               m_ip->m_pkthdr.csum_data = m->m_pkthdr.csum_data;
+        
+               m->m_pkthdr.csum_flags = 0;
+               m->m_pkthdr.csum_data = 0;
+        
+               // now do the checksums we need -- first IP
+               if ( m_ip->m_pkthdr.csum_flags & M_CSUM_IPv4 )
+               {
+                       // make sure the IP header (or at least the part with the cksum) is there
+                       m_ip = m_pullup( m_ip, sizeof( struct ip ) );
+                       if ( m_ip == NULL )
+                       {
+                               printf( "bridge: failed to flatten header\n ");
+                               m_freem( m );
+                               return NULL;
+                       }
+                       
+                       // now do the checksum
+                       {
+                               struct ip *ip = mtod( m_ip, struct ip* );
+                               ip->ip_sum = in_cksum( m_ip, hlen );
+                
+#ifdef VERY_VERY_VERY_DIAGNOSTIC
+                               printf( "bridge: performed IPv4 checksum\n" );
+#endif
+                       }
+               }
+        
+               // now do a TCP or UDP delayed checksum
+               if ( m_ip->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_UDPv4) )
+               {
+                       in_delayed_cksum( m_ip );
+            
+#ifdef VERY_VERY_VERY_DIAGNOSTIC
+                       printf( "bridge: performed TCPv4/UDPv4 checksum\n" );
+#endif
+               }
+        
+               // now attach the ethernet header back onto the IP packet
+               m->m_next = m_ip;
+               m->m_pkthdr.len += m_length( m_ip );    
+        
+               // clear the M_PKTHDR flags on the ip packet (again, we re-attach later)
+               m_ip->m_flags &= ~M_PKTHDR;
+        
+               // and clear any csum flags
+               m->m_pkthdr.csum_flags &= ~(M_CSUM_TCPv4|M_CSUM_UDPv4|M_CSUM_IPv4);
+       }
+       
+       return m;
+}
+#endif
+
+/*
+ * bridge_start:
+ *
+ *     Start output on a bridge.
+ */
+static errno_t
+bridge_start(ifnet_t ifp, mbuf_t m)
+{
+       struct bridge_softc *sc = ifnet_softc(ifp);
+       struct ether_header *eh;
+       struct ifnet *dst_if;
+       
+       lck_mtx_assert(sc->sc_mtx, LCK_MTX_ASSERT_NOTOWNED);
+
+       eh = mtod(m, struct ether_header *);
+       
+       if ((m->m_flags & (M_BCAST|M_MCAST)) == 0 &&
+               (dst_if = bridge_rtlookup(sc, eh->ether_dhost)) != NULL) {
+               
+               {
+#if APPLE_BRIDGE_HWCKSUM_SUPPORT
+                       /* 
+                        * APPLE MODIFICATION - if the packet needs a checksum (i.e., 
+                        * checksum has been deferred for HW support) AND the destination
+                        * interface doesn't support HW checksums, then we 
+                        * need to fix-up the checksum here
+                        */
+                       if (
+                               ( (m->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_UDPv4|M_CSUM_IPv4) ) != 0 ) &&
+                               ( (dst_if->if_csum_flags_tx & m->m_pkthdr.csum_flags ) != m->m_pkthdr.csum_flags )
+                               )
+                       {
+                               m = bridge_fix_txcsum( m );
+                               if ( m == NULL )
+                               {
+                                       goto done;
+                               }
+                       }
+                       
+#else
+                       if (eh->ether_type == htons(ETHERTYPE_IP))
+                               mbuf_outbound_finalize(m, PF_INET, sizeof(struct ether_header));
+                       else
+                               m->m_pkthdr.csum_flags = 0;
+#endif
+                       lck_mtx_lock(sc->sc_mtx);
+                       #if NBPFILTER > 0
+                               if (sc->sc_bpf_output)
+                                       bridge_bpf_output(ifp, m);
+                       #endif
+                       bridge_enqueue(sc, dst_if, m);
+                       lck_mtx_unlock(sc->sc_mtx);
+               }
+       } else
+       {
+#if APPLE_BRIDGE_HWCKSUM_SUPPORT
+               
+               /* 
+                * APPLE MODIFICATION - if the MULTICAST packet needs a checksum (i.e., 
+                * checksum has been deferred for HW support) AND at least one destination
+                * interface doesn't support HW checksums, then we go ahead and fix it up
+                * here, since it doesn't make sense to do it more than once
+                */
+               
+               if (
+                       (m->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_UDPv4|M_CSUM_IPv4)) &&
+                       /*
+                        * XXX FIX ME: keep track of whether or not we have any interfaces that 
+                        * do not support checksums (for now, assume we do)
+                        */
+                       ( 1 )
+                       )
+               {
+                       m = bridge_fix_txcsum( m );
+                       if ( m == NULL )
+                       {
+                               goto done;
+                       }
+               }
+#else
+               if (eh->ether_type == htons(ETHERTYPE_IP))
+                       mbuf_outbound_finalize(m, PF_INET, sizeof(struct ether_header));
+               else
+                       m->m_pkthdr.csum_flags = 0;
+#endif
+               
+               lck_mtx_lock(sc->sc_mtx);
+               #if NBPFILTER > 0
+                       if (sc->sc_bpf_output)
+                               bridge_bpf_output(ifp, m);
+               #endif
+               bridge_broadcast(sc, ifp, m, 0);
+               lck_mtx_unlock(sc->sc_mtx);
+       }
+#if APPLE_BRIDGE_HWCKSUM_SUPPORT
+done:
+#endif
+
+       return 0;
+}
+
+/*
+ * bridge_forward:
+ *
+ *     The forwarding function of the bridge.
+ */
+static void
+bridge_forward(struct bridge_softc *sc, struct mbuf *m)
+{
+       struct bridge_iflist *bif;
+       struct ifnet *src_if, *dst_if;
+       struct ether_header *eh;
+
+       lck_mtx_assert(sc->sc_mtx, LCK_MTX_ASSERT_OWNED);
+
+#if BRIDGE_DEBUG
+       if (_if_brige_debug)
+        printf("bridge_forward %s%d m%p\n", ifnet_name(sc->sc_if), ifnet_unit(sc->sc_if), m);
+#endif /* BRIDGE_DEBUG */
+       
+       src_if = m->m_pkthdr.rcvif;
+       
+       (void) ifnet_stat_increment_in(sc->sc_if, 1, m->m_pkthdr.len, 0);
+       
+       /*
+        * Look up the bridge_iflist.
+        */
+       bif = bridge_lookup_member_if(sc, src_if);
+       if (bif == NULL) {
+               /* Interface is not a bridge member (anymore?) */
+               m_freem(m);
+               return;
+       }
+       
+       /* APPLE MODIFICATION <cbz@apple.com> - add the ability to block forwarding of packets; for the guest network */
+#if ( APPLE_HAVE_80211_GUEST_NETWORK )
+       if (bif->bif_flags & IFBIF_NO_FORWARDING) {
+               /* Drop the packet and we're done. */
+               m_freem(m);
+               return;
+       }
+#endif
+       
+       if (bif->bif_flags & IFBIF_STP) {
+               switch (bif->bif_state) {
+            case BSTP_IFSTATE_BLOCKING:
+            case BSTP_IFSTATE_LISTENING:
+            case BSTP_IFSTATE_DISABLED:
+                m_freem(m);
+                return;
+               }
+       }
+       
+       eh = mtod(m, struct ether_header *);
+       
+       /*
+        * If the interface is learning, and the source
+        * address is valid and not multicast, record
+        * the address.
+        */
+       if ((bif->bif_flags & IFBIF_LEARNING) != 0 &&
+           ETHER_IS_MULTICAST(eh->ether_shost) == 0 &&
+           (eh->ether_shost[0] | eh->ether_shost[1] |
+            eh->ether_shost[2] | eh->ether_shost[3] |
+            eh->ether_shost[4] | eh->ether_shost[5]) != 0) {
+               (void) bridge_rtupdate(sc, eh->ether_shost,
+                               src_if, 0, IFBAF_DYNAMIC);
+       }
+       
+       if ((bif->bif_flags & IFBIF_STP) != 0 &&
+           bif->bif_state == BSTP_IFSTATE_LEARNING) {
+               m_freem(m);
+               return;
+       }
+       
+       /*
+        * At this point, the port either doesn't participate
+        * in spanning tree or it is in the forwarding state.
+        */
+       
+       /*
+        * If the packet is unicast, destined for someone on
+        * "this" side of the bridge, drop it.
+        */
+       if ((m->m_flags & (M_BCAST|M_MCAST)) == 0) {
+        /* APPLE MODIFICATION <cbz@apple.com> - if the packet came in on a proxy sta discovery interface,
+         we need to not look up the node by DA of the packet; we need to look up the proxy sta which 
+         matches the SA.  If it's not found yet, drop the packet. */
+#if IEEE80211_PROXYSTA
+               if (bif->bif_flags & IFBIF_PROXYSTA_DISCOVER)
+               {
+                       struct bridge_rtnode *brt;
+                       dst_if = NULL;
+                       brt = bridge_rtnode_lookup(sc, eh->ether_shost);
+                       if (brt) {
+                               dst_if = brt->brt_ifp_proxysta;
+                       }
+                       if (dst_if == NULL) {
+                               m_freem(m);
+                               return;
+                       }
+               }
+               else
+#endif 
+            dst_if = bridge_rtlookup(sc, eh->ether_dhost);
+               if (src_if == dst_if) {
+                       m_freem(m);
+                       return;
+               }
+       } else {
+               /* ...forward it to all interfaces. */
+               sc->sc_if->if_imcasts++;
+               dst_if = NULL;
+       }
+       
+       /* APPLE MODIFICATION  
+     <rnewberry@apple.com>     - this is now handled by bridge_input
+     <cbz@apple.com>           - turning this back on because all packets are not bpf_mtap'd
+     equally.  RSN Preauth were not getting through; we're 
+     conditionalizing this call on 
+     (eh->ether_type == htons(ETHERTYPE_RSN_PREAUTH)) 
+     */
+#if 1
+       if (eh->ether_type == htons(ETHERTYPE_RSN_PREAUTH))
+       {
+        m->m_pkthdr.rcvif = sc->sc_if;
+#if NBPFILTER > 0
+        if (sc->sc_bpf_input)
+            bridge_bpf_input(sc->sc_if, m);
+#endif
+       }
+#endif
+        
+       if (dst_if == NULL) {
+        
+#if APPLE_BRIDGE_HWCKSUM_SUPPORT
+        /*
+         * Clear any in-bound checksum flags for this packet.
+         */
+        m->m_pkthdr.csum_flags = 0;
+#else
+               mbuf_inbound_modified(m);
+#endif
+        
+        bridge_broadcast(sc, src_if, m, 1);
+        return;
+       }
+       
+       /*
+        * At this point, we're dealing with a unicast frame
+        * going to a different interface.
+        */
+       if ((ifnet_flags(dst_if) & IFF_RUNNING) == 0) {
+               m_freem(m);
+               return;
+       }
+       bif = bridge_lookup_member_if(sc, dst_if);
+       if (bif == NULL) {
+               /* Not a member of the bridge (anymore?) */
+               m_freem(m);
+               return;
+       }
+       
+       if (bif->bif_flags & IFBIF_STP) {
+               switch (bif->bif_state) {
+            case BSTP_IFSTATE_DISABLED:
+            case BSTP_IFSTATE_BLOCKING:
+                m_freem(m);
+                return;
+               }
+       }
+        
+#if APPLE_BRIDGE_HWCKSUM_SUPPORT
+       /*
+        * Clear any in-bound checksum flags for this packet.
+        */
+       {
+               m->m_pkthdr.csum_flags = 0;
+       }
+#else
+       mbuf_inbound_modified(m);
+#endif
+       
+       bridge_enqueue(sc, dst_if, m);
+}
+
+char * ether_ntop(char *, size_t , const u_char *);
+
+__private_extern__ char *
+ether_ntop(char *buf, size_t len, const u_char *ap)
+{
+       snprintf(buf, len, "%02x:%02x:%02x:%02x:%02x:%02x", 
+                        ap[0], ap[1], ap[2], ap[3], ap[4], ap[5]);
+       
+       return buf;
+}
+
+/*
+ * bridge_input:
+ *
+ *     Receive input from a member interface.  Queue the packet for
+ *     bridging if it is not for us.
+ */
+errno_t
+bridge_input(struct bridge_iflist *bif, struct ifnet *ifp, struct mbuf *m, void *frame_header)
+{
+       struct ifnet *bifp;
+       struct ether_header *eh;
+       struct mbuf *mc;
+       int is_for_us = 0;
+       struct bridge_softc *sc = bif->bif_sc;
+       struct bridge_iflist *brm;
+       
+#if BRIDGE_DEBUG
+       if (_if_brige_debug)
+               printf("bridge_input: %s%d from %s%d m %p data %p\n", 
+                          ifnet_name(sc->sc_if), ifnet_unit(sc->sc_if),
+                          ifnet_name(ifp), ifnet_unit(ifp), 
+                          m, mbuf_data(m));
+#endif /* BRIDGE_DEBUG */
+
+       if ((ifnet_flags(sc->sc_if) & IFF_RUNNING) == 0) {
+#if BRIDGE_DEBUG
+               if (_if_brige_debug)
+                       printf( "bridge_input: %s%d not running passing along\n",
+                                  ifnet_name(sc->sc_if), ifnet_unit(sc->sc_if));
+#endif /* BRIDGE_DEBUG */
+               return 0;
+       }
+       
+       /* Need to clear the promiscous flags otherwise it will be dropped by DLIL after processing filters */
+       if ((mbuf_flags(m) & MBUF_PROMISC))
+               mbuf_setflags_mask(m, 0, MBUF_PROMISC);
+       
+       lck_mtx_lock(sc->sc_mtx);
+       
+       bifp = sc->sc_if;
+       
+       /* Is it a good idea to reassign a new value to bif ? TBD */
+       bif = bridge_lookup_member_if(sc, ifp);
+       if (bif == NULL) {
+               lck_mtx_unlock(sc->sc_mtx);
+#if BRIDGE_DEBUG
+               if (_if_brige_debug)
+                       printf( "bridge_input: %s%d bridge_lookup_member_if failed\n",
+                                  ifnet_name(sc->sc_if), ifnet_unit(sc->sc_if));
+#endif /* BRIDGE_DEBUG */
+               return 0;
+       }
+       
+       eh = (struct ether_header *)mbuf_data(m);
+       
+       /*
+        * If the packet is for us, set the packets source as the
+        * bridge, and return the packet back to ether_input for
+        * local processing.
+        */
+       if (memcmp(eh->ether_dhost, ifnet_lladdr(bifp),
+                          ETHER_ADDR_LEN) == 0) {
+               
+               /* Mark the packet as arriving on the bridge interface */
+               (void) mbuf_pkthdr_setrcvif(m, bifp);
+               mbuf_pkthdr_setheader(m, frame_header);
+               
+               /*
+                * If the interface is learning, and the source
+                * address is valid and not multicast, record
+                * the address.
+                */
+               if ((bif->bif_flags & IFBIF_LEARNING) != 0 &&
+                       ETHER_IS_MULTICAST(eh->ether_shost) == 0 &&
+                       (eh->ether_shost[0] | eh->ether_shost[1] |
+                        eh->ether_shost[2] | eh->ether_shost[3] |
+                        eh->ether_shost[4] | eh->ether_shost[5]) != 0) {
+                               (void) bridge_rtupdate(sc, eh->ether_shost,
+                                                                          ifp, 0, IFBAF_DYNAMIC);
+                       }
+               
+#if NBPFILTER > 0
+               if (sc->sc_bpf_input)
+                       bridge_bpf_input(bifp, m);
+#endif
+               
+               (void) mbuf_setdata(m, (char *)mbuf_data(m) + ETHER_HDR_LEN, mbuf_len(m) - ETHER_HDR_LEN);
+               (void) mbuf_pkthdr_adjustlen(m, - ETHER_HDR_LEN);
+               
+               (void) ifnet_stat_increment_in(bifp, 1, mbuf_pkthdr_len(m), 0);
+
+               lck_mtx_unlock(sc->sc_mtx);
+                               
+#if BRIDGE_DEBUG
+               if (_if_brige_debug)
+                       printf( "bridge_input: %s%d packet for bridge\n",
+                                  ifnet_name(sc->sc_if), ifnet_unit(sc->sc_if));
+#endif /* BRIDGE_DEBUG */
+               
+               dlil_input_packet_list(bifp, m);
+               
+               return EJUSTRETURN;
+       }
+       
+       /*
+        * if the destination of the packet is for the MAC address of 
+        * the member interface itself, then we don't need to forward
+        * it -- just pass it back.  Note that it'll likely just be
+        * dropped by the stack, but if something else is bound to 
+        * the interface directly (for example, the wireless stats
+        * protocol -- although that actually uses BPF right now), 
+        * then it will consume the packet
+        *
+        * ALSO, note that we do this check AFTER checking for the 
+        * bridge's own MAC address, because the bridge may be
+        * using the SAME MAC address as one of its interfaces
+        */
+       if (memcmp(eh->ether_dhost, ifnet_lladdr(ifp),
+                          ETHER_ADDR_LEN) == 0) {
+               /* APPLE MODIFICATION <cbz@apple.com> - add support for Proxy STA */
+#if IEEE80211_PROXYSTA
+               if ((bif->bif_flags & IFBIF_PROXYSTA) == 0) {
+#endif
+                       
+#ifdef VERY_VERY_VERY_DIAGNOSTIC
+                       printf("bridge_input: not forwarding packet bound for member interface\n" );
+#endif
+                       lck_mtx_unlock(sc->sc_mtx);
+                       return 0;
+                       /* APPLE MODIFICATION <cbz@apple.com> - add support for Proxy STA */
+#if IEEE80211_PROXYSTA
+               }
+#if VERY_VERY_VERY_DIAGNOSTIC
+               else {
+                       printf( "%s: pkt rx on %s [proxysta iface], da is %02x:%02x:%02x:%02x:%02x:%02x\n",
+                                  __func__, ifp->if_xname, eh->ether_dhost[0], eh->ether_dhost[1], eh->ether_dhost[2], 
+                                  eh->ether_dhost[3], eh->ether_dhost[4], eh->ether_dhost[5] );
+               }
+#endif
+#endif
+       }
+       
+       if ((m->m_flags & (M_BCAST|M_MCAST))) {
+               struct ifmultiaddr *ifma = NULL;
+               
+               if ((m->m_flags & M_BCAST)) {
+                       is_for_us = 1;
+               } else {
+#if BRIDGE_DEBUG
+                       printf("mulicast: %02x:%02x:%02x:%02x:%02x:%02x\n",
+                                  eh->ether_dhost[0], eh->ether_dhost[1], eh->ether_dhost[2], 
+                                  eh->ether_dhost[3], eh->ether_dhost[4], eh->ether_dhost[5]);
+                       
+                       for (ifma = bifp->if_multiaddrs.lh_first; ifma;
+                                ifma = ifma->ifma_link.le_next) {
+                               
+                               if (ifma->ifma_addr == NULL)
+                                       printf("  <none> ");
+                               else if (ifma->ifma_addr->sa_family == AF_INET) {
+                                       struct sockaddr_in *sin = (struct sockaddr_in *)ifma->ifma_addr;
+                                       
+                                       printf("  %u.%u.%u.%u ",
+                                                  (sin->sin_addr.s_addr & 0xff000000) >> 24,
+                                                  (sin->sin_addr.s_addr & 0x00ff0000) >> 16,
+                                                  (sin->sin_addr.s_addr & 0x0000ff00) >> 8,
+                                                  (sin->sin_addr.s_addr & 0x000000ff));
+                               }
+                               if (!ifma->ifma_ll || !ifma->ifma_ll->ifma_addr)
+                                       printf("<none>\n");
+                               else {
+                                       struct sockaddr_dl *sdl = (struct sockaddr_dl *)ifma->ifma_ll->ifma_addr;
+                                       
+                                       printf("%02x:%02x:%02x:%02x:%02x:%02x\n",
+                                                  CONST_LLADDR(sdl)[0], CONST_LLADDR(sdl)[1], CONST_LLADDR(sdl)[2], 
+                                                  CONST_LLADDR(sdl)[3], CONST_LLADDR(sdl)[4], CONST_LLADDR(sdl)[5]);
+                                       
+                               }
+                       }
+#endif /* BRIDGE_DEBUG */
+                       
+                       /*
+                        * the upper layer of the stack have attached a list of multicast addresses to the bridge itself
+                        * (for example, the IP stack has bound 01:00:5e:00:00:01 to the 224.0.0.1 all hosts address), since
+                        * the IP stack is bound to the bridge.  so we need to see if the packets arriving here SHOULD be 
+                        * passed up as coming from the bridge.
+                        *
+                        * furthermore, since we know the IP stack is attached to the bridge, and NOTHING is attached
+                        * to the underlying devices themselves, we can drop packets that don't need to go up (by returning NULL
+                        * from bridge_input to the caller) after we forward the packet to other interfaces
+                        */
+                       
+                       for (ifma = bifp->if_multiaddrs.lh_first; ifma;
+                                ifma = ifma->ifma_link.le_next) {
+                               if (ifma->ifma_ll && ifma->ifma_ll->ifma_addr) {
+                                       struct sockaddr_dl *sdl = (struct sockaddr_dl *)ifma->ifma_ll->ifma_addr;
+                                       
+                                       if (memcmp(eh->ether_dhost, CONST_LLADDR(sdl), ETHER_ADDR_LEN) == 0)
+                                               break;
+                               }
+                       }
+                       if (ifma != NULL) {
+                               /* this packet matches the bridge's own filter, so pass it up as coming from us */
+                               
+                               /* Mark the packet as arriving on the bridge interface */
+                               // don't do this until AFTER we forward the packet -- bridge_forward uses this information
+                               //m->m_pkthdr.rcvif = bifp;
+                               
+                               /* keep track of this to help us decide about forwarding */
+                               is_for_us = 1;
+                               
+#if BRIDGE_DEBUG
+                               char    addr[sizeof("XX:XX:XX:XX:XX:XX")+1];
+                               printf( "bridge_input: multicast frame for us (%s)\n",
+                                          ether_ntop(addr, sizeof(addr), eh->ether_dhost) );
+#endif
+                       } else {
+#if BRIDGE_DEBUG
+                               char    addr[sizeof("XX:XX:XX:XX:XX:XX")+1];
+                               printf( "bridge_input: multicast frame for unbound address (%s), forwarding but not passing to stack\n",
+                                          ether_ntop(addr, sizeof(addr), eh->ether_dhost) );
+#endif
+                       }
+               }
+               /* Tap off 802.1D packets; they do not get forwarded. */
+               if (memcmp(eh->ether_dhost, bstp_etheraddr,
+                                  ETHER_ADDR_LEN) == 0) {
+                       m = bstp_input(sc, ifp, m);
+                       if (m == NULL) {
+                               lck_mtx_unlock(sc->sc_mtx);
+#if BRIDGE_DEBUG
+                               if (_if_brige_debug)
+                                       printf( "bridge_input: %s%d mcast BSTP not forwarded\n",
+                                                  ifnet_name(sc->sc_if), ifnet_unit(sc->sc_if));
+#endif /* BRIDGE_DEBUG */
+                               return EJUSTRETURN;
+                       }
+               }
+               
+               if (bif->bif_flags & IFBIF_STP) {
+                       switch (bif->bif_state) {
+                               case BSTP_IFSTATE_BLOCKING:
+                               case BSTP_IFSTATE_LISTENING:
+                               case BSTP_IFSTATE_DISABLED:
+                               {
+                                       lck_mtx_unlock(sc->sc_mtx);
+                                       
+#if BRIDGE_DEBUG
+                                       if (_if_brige_debug)
+                                               printf( "bridge_input: %s%d mcast bridge not learning or forwarding \n",
+                                                          ifnet_name(sc->sc_if), ifnet_unit(sc->sc_if));
+#endif /* BRIDGE_DEBUG */
+                                       
+                                       m_freem(m);
+                                       return EJUSTRETURN;
+                               }
+                       }
+               }
+               
+               /*
+                * If the interface is learning, and the source
+                * address is valid and not multicast, record
+                * the address.
+                */
+               if ((bif->bif_flags & IFBIF_LEARNING) != 0 &&
+                       ETHER_IS_MULTICAST(eh->ether_shost) == 0 &&
+                       (eh->ether_shost[0] | eh->ether_shost[1] |
+                        eh->ether_shost[2] | eh->ether_shost[3] |
+                        eh->ether_shost[4] | eh->ether_shost[5]) != 0) {
+                               (void) bridge_rtupdate(sc, eh->ether_shost,
+                                                                          ifp, 0, IFBAF_DYNAMIC);
+                       }
+               
+               if (is_for_us) {
+                       /*
+                        * Make a deep copy of the packet and enqueue the copy
+                        * for bridge processing; return the original packet for
+                        * local processing.
+                        */
+                       mc = m_dup(m, M_NOWAIT);
+                       if (mc == NULL) {
+#ifdef DIAGNOSTIC
+                               printf( "bridge_input: failed to duplicate multicast frame, not forwarding\n" );
+#endif
+#if BRIDGE_DEBUG
+                       } else {
+                               if (_if_brige_debug) {
+                                       printf_mbuf(mc, "mc for us: ", "\n");
+                                       printf_mbuf_data(m, 0, 20);
+                                       printf("\n");
+                               }
+#endif /* BRIDGE_DEBUG */
+                       }
+               } else {
+                       /*
+                        * we'll just pass the original, since we don't need to pass it
+                        * up the stack
+                        */
+                       mc = m;
+               }
+               
+               /* Perform the bridge forwarding function with the copy. */
+               if (mc != NULL) {
+#if BRIDGE_DEBUG
+                       if (_if_brige_debug)
+                               printf( "bridge_input: %s%d mcast forwarding \n",
+                                          ifnet_name(sc->sc_if), ifnet_unit(sc->sc_if));
+#endif /* BRIDGE_DEBUG */                      
+                       bridge_forward(sc, mc);
+               }
+               
+               // TBD should have an option for type of bridge
+#if 0
+               /*
+                * Reinject the mbuf as arriving on the bridge so we have a
+                * chance at claiming multicast packets. We can not loop back
+                * here from ether_input as a bridge is never a member of a
+                * bridge.
+                */
+               if (bifp->if_bridge != NULL)
+                       panic("brige_input: brige %p in a bridge %p\n", bifp, bifp->if_bridge);
+               mc = m_dup(m, M_NOWAIT);
+               if (mc != NULL) {
+                       mc->m_pkthdr.rcvif = bifp;
+#if NBPFILTER > 0
+                       if (sc->sc_bpf_input)
+                               bridge_bpf_input(bifp, mc);
+#endif
+               }
+#endif        
+               /* Return the original packet for local processing. */
+               if ( !is_for_us )
+               {
+                       /* we don't free the packet -- bridge_forward already did so */
+                       lck_mtx_unlock(sc->sc_mtx);
+                       
+#if BRIDGE_DEBUG
+                       if (_if_brige_debug)
+                               printf( "bridge_input: %s%d mcast local processing\n",
+                                          ifnet_name(sc->sc_if), ifnet_unit(sc->sc_if));
+#endif
+                       
+                       return EJUSTRETURN;
+               }
+               
+               // mark packet as arriving on the bridge
+               m->m_pkthdr.rcvif = bifp;
+               m->m_pkthdr.header = mbuf_data(m);
+               
+#if NBPFILTER > 0
+               if (sc->sc_bpf_input)
+                       bridge_bpf_input(bifp, m);
+#endif
+               (void) mbuf_setdata(m, (char *)mbuf_data(m) + ETHER_HDR_LEN, mbuf_len(m) - ETHER_HDR_LEN);
+               (void) mbuf_pkthdr_adjustlen(m, - ETHER_HDR_LEN);
+               
+               (void) ifnet_stat_increment_in(bifp, 1, mbuf_pkthdr_len(m), 0);
+               
+               lck_mtx_unlock(sc->sc_mtx);
+               
+#if BRIDGE_DEBUG
+               if (_if_brige_debug)
+                       printf( "bridge_input: %s%d mcast for us\n",
+                                  ifnet_name(sc->sc_if), ifnet_unit(sc->sc_if));
+#endif /* BRIDGE_DEBUG */
+               
+               dlil_input_packet_list(bifp, m);
+               
+               return EJUSTRETURN;
+       }
+       
+       if (bif->bif_flags & IFBIF_STP) {
+               switch (bif->bif_state) {
+                       case BSTP_IFSTATE_BLOCKING:
+                       case BSTP_IFSTATE_LISTENING:
+                       case BSTP_IFSTATE_DISABLED:
+                               lck_mtx_unlock(sc->sc_mtx);
+                               
+#if BRIDGE_DEBUG
+                               if (_if_brige_debug)
+                                       printf( "bridge_input: %s%d ucast bridge not learning or forwarding \n",
+                                                  ifnet_name(sc->sc_if), ifnet_unit(sc->sc_if));
+#endif /* BRIDGE_DEBUG */
+                               
+                               m_freem(m);
+                               return EJUSTRETURN;
+               }
+       }
+       
+       /* this code is not needed for Apple's bridge where the stack attaches directly */
+#if 1 /* TBD should be an option */
+       /*
+        * Unicast.  Make sure it's not for us.
+        */
+       LIST_FOREACH(brm, &sc->sc_iflist, bif_next) {
+               if(ifnet_type(brm->bif_ifp) != IFT_ETHER)
+                       continue;
+               
+               /* It is destined for us. */
+               if (memcmp(ifnet_lladdr(brm->bif_ifp), eh->ether_dhost,
+                                  ETHER_ADDR_LEN) == 0) {
+                       if (brm->bif_flags & IFBIF_LEARNING)
+                               (void) bridge_rtupdate(sc,
+                                                                          eh->ether_shost, ifp, 0, IFBAF_DYNAMIC);
+                       m->m_pkthdr.rcvif = brm->bif_ifp;
+                       m->m_pkthdr.header = mbuf_data(m);
+                       
+                       (void) mbuf_setdata(m, (char *)mbuf_data(m) + ETHER_HDR_LEN, mbuf_len(m) - ETHER_HDR_LEN);
+                       (void) mbuf_pkthdr_adjustlen(m, - ETHER_HDR_LEN);
+#if BRIDGE_SUPPORT_GIF
+#if NGIF > 0
+                       if (ifnet_type(ifp) == IFT_GIF) {
+                               m->m_flags |= M_PROTO1;
+                               m->m_pkthdr.rcvif = brm->bif_ifp;
+                               (*brm->bif_ifp->if_input)(brm->bif_ifp, m);
+                               m = NULL;
+                       }
+#endif
+#endif
+                       lck_mtx_unlock(sc->sc_mtx);
+                       
+#if BRIDGE_DEBUG
+                       if (_if_brige_debug)
+                               printf( "bridge_input: %s%d ucast to member %s%d\n",
+                                          ifnet_name(sc->sc_if), ifnet_unit(sc->sc_if),
+                                          ifnet_name(brm->bif_ifp), ifnet_unit(brm->bif_ifp));
+#endif /* BRIDGE_DEBUG */
+                       
+                       dlil_input_packet_list(brm->bif_ifp, m);
+                       
+                       return EJUSTRETURN;
+               }
+               
+               /* We just received a packet that we sent out. */
+               if (memcmp(ifnet_lladdr(brm->bif_ifp), eh->ether_shost,
+                                  ETHER_ADDR_LEN) == 0) {
+                       lck_mtx_unlock(sc->sc_mtx);
+                       
+#if BRIDGE_DEBUG
+                       if (_if_brige_debug)
+                               printf( "bridge_input: %s%d ucast drop packet we sent out\n",
+                                          ifnet_name(sc->sc_if), ifnet_unit(sc->sc_if));
+#endif /* BRIDGE_DEBUG */
+                       
+                       m_freem(m);
+                       return EJUSTRETURN;
+               }
+       }
+#endif
+       
+       /*
+        * If the interface is learning, and the source
+        * address is valid and not multicast, record
+        * the address.
+        */
+       if ((bif->bif_flags & IFBIF_LEARNING) != 0 &&
+               ETHER_IS_MULTICAST(eh->ether_shost) == 0 &&
+               (eh->ether_shost[0] | eh->ether_shost[1] |
+                eh->ether_shost[2] | eh->ether_shost[3] |
+                eh->ether_shost[4] | eh->ether_shost[5]) != 0) {
+                       (void) bridge_rtupdate(sc, eh->ether_shost,
+                                                                  ifp, 0, IFBAF_DYNAMIC);
+               }
+       
+       /* Perform the bridge forwarding function. */
+#if BRIDGE_DEBUG
+       if (_if_brige_debug)
+               printf( "bridge_input: %s%d ucast forwarding\n",
+                          ifnet_name(sc->sc_if), ifnet_unit(sc->sc_if));
+#endif /* BRIDGE_DEBUG */
+       
+       bridge_forward(sc, m);
+       lck_mtx_unlock(sc->sc_mtx);
+       return EJUSTRETURN;
+}
+
+/*
+ * bridge_broadcast:
+ *
+ *     Send a frame to all interfaces that are members of
+ *     the bridge, except for the one on which the packet
+ *     arrived.
+ */
+static void
+bridge_broadcast(struct bridge_softc *sc, struct ifnet *src_if,
+                 struct mbuf *m, __unused int runfilt)
+{
+       struct bridge_iflist *bif;
+       struct mbuf *mc;
+       struct ifnet *dst_if;
+       int used = 0;
+       
+       lck_mtx_assert(sc->sc_mtx, LCK_MTX_ASSERT_OWNED);
+       
+       LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
+               dst_if = bif->bif_ifp;
+               if (dst_if == src_if)
+                       continue;
+        
+               if (bif->bif_flags & IFBIF_STP) {
+                       switch (bif->bif_state) {
+                case BSTP_IFSTATE_BLOCKING:
+                case BSTP_IFSTATE_DISABLED:
+                    continue;
+                       }
+               }
+        
+               if ((bif->bif_flags & IFBIF_DISCOVER) == 0 &&
+                   (m->m_flags & (M_BCAST|M_MCAST)) == 0)
+                       continue;
+        
+               if ((ifnet_flags(dst_if) & IFF_RUNNING) == 0)
+                       continue;
+        
+               if (LIST_NEXT(bif, bif_next) == NULL) {
+                       mc = m;
+                       used = 1;
+               } else {
+                       mc = m_copym(m, 0, M_COPYALL, M_DONTWAIT);
+                       if (mc == NULL) {
+                               (void) ifnet_stat_increment_out(sc->sc_if, 0, 0, 1);
+                               continue;
+                       }
+               }
+        
+               bridge_enqueue(sc, dst_if, mc);
+       }
+       if (used == 0)
+               m_freem(m);
+}
+
+/*
+ * bridge_rtupdate:
+ *
+ *     Add a bridge routing entry.
+ */
+static int
+bridge_rtupdate(struct bridge_softc *sc, const uint8_t *dst,
+                struct ifnet *dst_if, int setflags, uint8_t flags)
+{
+       struct bridge_rtnode *brt;
+       int error;
+       /* APPLE MODIFICATION <cbz@apple.com> - add support for Proxy STA */
+#if IEEE80211_PROXYSTA
+       struct bridge_iflist *bif;
+       int is_pds; /* are we a proxy sta discovery interface? */
+#endif
+       struct timespec now;
+       
+       /* APPLE MODIFICATION <cbz@apple.com> - add support for Proxy STA - is this an interface 
+     we want to do proxy sta discovery on? */
+#if IEEE80211_PROXYSTA
+       bif = bridge_lookup_member_if(sc, dst_if);
+       if ((bif) && (bif->bif_flags & IFBIF_PROXYSTA_DISCOVER)) {
+               is_pds = 1;
+       }
+       else {
+               is_pds = 0;
+       }
+#endif         
+       /*
+        * A route for this destination might already exist.  If so,
+        * update it, otherwise create a new one.
+        */
+       if ((brt = bridge_rtnode_lookup(sc, dst)) == NULL) {
+        /* APPLE MODIFICATION <cbz@apple.com> - add support for Proxy STA */
+#if IEEE80211_PROXYSTA
+               /* don't count this address against the bridge cache (well, allow proxy stas to double that 
+         number...put *some* boundary on it.) if we are a proxy sta discovery interface */
+               if (is_pds) {
+                       if (sc->sc_brtcnt >= (sc->sc_brtmax+sc->sc_brtmax_proxysta))
+                               return (ENOSPC);
+               }
+               else
+#endif         
+            if (sc->sc_brtcnt >= sc->sc_brtmax)
+                return (ENOSPC);
+        
+               /*
+                * Allocate a new bridge forwarding node, and
+                * initialize the expiration time and Ethernet
+                * address.
+                */
+               brt = zalloc_noblock(bridge_rtnode_pool);
+               if (brt == NULL)
+                       return (ENOMEM);
+        
+               memset(brt, 0, sizeof(*brt));
+               nanouptime(&now);
+               brt->brt_expire = now.tv_sec + sc->sc_brttimeout;
+               brt->brt_flags = IFBAF_DYNAMIC;
+               memcpy(brt->brt_addr, dst, ETHER_ADDR_LEN);
+        
+        /* APPLE MODIFICATION <cbz@apple.com> - add support for Proxy STA - is this an interface 
+         we want to do proxy sta discovery on?  If so, post a monitoring event */
+#if IEEE80211_PROXYSTA
+               if (is_pds) {
+                       brt->brt_flags_ext |= IFBAF_EXT_PROXYSTA;
+#if DIAGNOSTIC
+                       printf( "%s: proxysta %02x:%02x:%02x:%02x:%02x:%02x on %s; discovery\n",
+                   __func__, dst[0], dst[1], dst[2], dst[3], dst[4], dst[5], dst_if->if_xname );
+#endif
+                       bridge_proxysta_discover( dst_if, dst );        
+               }       
+#endif
+        
+               if ((error = bridge_rtnode_insert(sc, brt)) != 0) {
+                       zfree(bridge_rtnode_pool, brt);
+                       return (error);
+               }
+       }
+       
+       brt->brt_ifp = dst_if;
+       if (setflags) {
+               brt->brt_flags = flags;
+               brt->brt_expire = (flags & IFBAF_STATIC) ? 0 :
+        now.tv_sec + sc->sc_brttimeout;
+       }
+       
+       /* APPLE MODIFICATION <cbz@apple.com> - add support for Proxy STA -  */
+#if IEEE80211_PROXYSTA
+       if (is_pds) {
+#if VERY_VERY_DIAGNOSTIC
+               printf( "%s: proxysta %02x:%02x:%02x:%02x:%02x:%02x on %s; reset timeout\n",
+               __func__, dst[0], dst[1], dst[2], dst[3], dst[4], dst[5], dst_if->if_xname );
+#endif
+               brt->brt_expire = (flags & IFBAF_STATIC) ? 0 :
+        now.tv_sec + sc->sc_brttimeout;
+       }       
+#endif
+       
+       return (0);
+}
+
+/*
+ * bridge_rtlookup:
+ *
+ *     Lookup the destination interface for an address.
+ */
+static struct ifnet *
+bridge_rtlookup(struct bridge_softc *sc, const uint8_t *addr)
+{
+       struct bridge_rtnode *brt;
+       
+       if ((brt = bridge_rtnode_lookup(sc, addr)) == NULL)
+               return (NULL);
+       
+       return (brt->brt_ifp);
+}
+
+/*
+ * bridge_rttrim:
+ *
+ *     Trim the routine table so that we have a number
+ *     of routing entries less than or equal to the
+ *     maximum number.
+ */
+static void
+bridge_rttrim(struct bridge_softc *sc)
+{
+       struct bridge_rtnode *brt, *nbrt;
+       
+       /* Make sure we actually need to do this. */
+       if (sc->sc_brtcnt <= sc->sc_brtmax)
+               return;
+       
+       /* Force an aging cycle; this might trim enough addresses. */
+       bridge_rtage(sc);
+       if (sc->sc_brtcnt <= sc->sc_brtmax)
+               return;
+       
+       for (brt = LIST_FIRST(&sc->sc_rtlist); brt != NULL; brt = nbrt) {
+               nbrt = LIST_NEXT(brt, brt_list);
+               if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
+                       bridge_rtnode_destroy(sc, brt);
+                       if (sc->sc_brtcnt <= sc->sc_brtmax)
+                               return;
+               }
+       }
+}
+
+/*
+ * bridge_timer:
+ *
+ *     Aging timer for the bridge.
+ */
+static void
+bridge_timer(void *arg)
+{
+       struct bridge_softc *sc = arg;
+       struct timespec ts;
+       
+       lck_mtx_lock(sc->sc_mtx);
+       
+       bridge_rtage(sc);
+       
+       lck_mtx_unlock(sc->sc_mtx);
+       
+       if (ifnet_flags(sc->sc_if) & IFF_RUNNING) {
+               ts.tv_sec = bridge_rtable_prune_period;
+               ts.tv_nsec = 0;
+               bsd_timeout(bridge_timer, sc, &ts);
+       }
+}
+
+/*
+ * bridge_rtage:
+ *
+ *     Perform an aging cycle.
+ */
+static void
+bridge_rtage(struct bridge_softc *sc)
+{
+       struct bridge_rtnode *brt, *nbrt;
+       struct timespec now;
+       
+       nanouptime(&now);
+       
+       for (brt = LIST_FIRST(&sc->sc_rtlist); brt != NULL; brt = nbrt) {
+               nbrt = LIST_NEXT(brt, brt_list);
+               if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
+                       if ((unsigned long)now.tv_sec >= brt->brt_expire)
+                               bridge_rtnode_destroy(sc, brt);
+               }
+       }
+}
+
+/*
+ * bridge_rtflush:
+ *
+ *     Remove all dynamic addresses from the bridge.
+ */
+static void
+bridge_rtflush(struct bridge_softc *sc, int full)
+{
+       struct bridge_rtnode *brt, *nbrt;
+       
+       for (brt = LIST_FIRST(&sc->sc_rtlist); brt != NULL; brt = nbrt) {
+               nbrt = LIST_NEXT(brt, brt_list);
+               if (full || (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC)
+                       bridge_rtnode_destroy(sc, brt);
+       }
+}
+
+/* APPLE MODIFICATION <cbz@apple.com> - add support for Proxy STA */
+#if IEEE80211_PROXYSTA
+/*
+ * bridge_rtdiscovery:
+ *
+ */
+static void
+bridge_rtdiscovery(struct bridge_softc *sc)
+{
+       struct bridge_rtnode *brt, *nbrt;
+       struct bridge_iflist *bif;
+       
+       for (brt = LIST_FIRST(&sc->sc_rtlist); brt != NULL; brt = nbrt) {
+               nbrt = LIST_NEXT(brt, brt_list);
+               bif = bridge_lookup_member_if(sc, brt->brt_ifp);
+               if ((bif) && (bif->bif_flags & IFBIF_PROXYSTA_DISCOVER) && 
+                       ((brt->brt_flags_ext & IFBAF_EXT_PROXYSTA) == 0)) {
+#if DIAGNOSTIC
+                       printf( "%s: proxysta %02x:%02x:%02x:%02x:%02x:%02x on %s; found before IFBIF_PROXYSTA_DISCOVER\n",
+                                  __func__, brt->brt_addr[0], brt->brt_addr[1], brt->brt_addr[2], brt->brt_addr[3], 
+                                  brt->brt_addr[4], brt->brt_addr[5], brt->brt_ifp->if_xname );
+#endif
+                       brt->brt_flags_ext |= IFBAF_EXT_PROXYSTA;
+               }
+               
+               if (brt->brt_ifp_proxysta == NULL) {
+#if DIAGNOSTIC
+                       printf( "%s: proxysta %02x:%02x:%02x:%02x:%02x:%02x on %s; discovery\n",
+                                  __func__, brt->brt_addr[0], brt->brt_addr[1], brt->brt_addr[2], brt->brt_addr[3], 
+                                  brt->brt_addr[4], brt->brt_addr[5], brt->brt_ifp->if_xname );
+#endif
+                       bridge_proxysta_discover( brt->brt_ifp, brt->brt_addr );        
+               }
+       }
+}
+
+/*
+ * bridge_rtpurge:
+ *
+ *     Remove all dynamic addresses from a specific interface on the bridge.
+ */
+static void
+bridge_rtpurge(struct bridge_softc *sc, struct ifnet *ifs)
+{
+       struct bridge_rtnode *brt, *nbrt;
+       
+       for (brt = LIST_FIRST(&sc->sc_rtlist); brt != NULL; brt = nbrt) {
+               nbrt = LIST_NEXT(brt, brt_list);
+               if (brt->brt_ifp == ifs) {
+#if DIAGNOSTIC
+                       printf( "%s: purge %s [%02x:%02x:%02x:%02x:%02x:%02x] discovered on %s\n",
+                   __func__, brt->brt_ifp_proxysta ? brt->brt_ifp_proxysta->if_xname : brt->brt_ifp->if_xname, 
+                   brt->brt_addr[0], brt->brt_addr[1], brt->brt_addr[2], 
+                   brt->brt_addr[3], brt->brt_addr[4], brt->brt_addr[5], brt->brt_ifp->if_xname );
+#endif
+                       bridge_rtnode_destroy(sc, brt);
+               }
+       }
+}
+#endif
+
+/*
+ * bridge_rtdaddr:
+ *
+ *     Remove an address from the table.
+ */
+static int
+bridge_rtdaddr(struct bridge_softc *sc, const uint8_t *addr)
+{
+       struct bridge_rtnode *brt;
+       
+       if ((brt = bridge_rtnode_lookup(sc, addr)) == NULL)
+               return (ENOENT);
+       
+       bridge_rtnode_destroy(sc, brt);
+       return (0);
+}
+
+/*
+ * bridge_rtdelete:
+ *
+ *     Delete routes to a speicifc member interface.
+ */
+__private_extern__ void
+bridge_rtdelete(struct bridge_softc *sc, struct ifnet *ifp, int full)
+{
+       struct bridge_rtnode *brt, *nbrt;
+       
+       for (brt = LIST_FIRST(&sc->sc_rtlist); brt != NULL; brt = nbrt) {
+               nbrt = LIST_NEXT(brt, brt_list);
+               if (brt->brt_ifp == ifp && (full ||
+                                    (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC))
+                       bridge_rtnode_destroy(sc, brt);
+       }
+}
+
+/*
+ * bridge_rtable_init:
+ *
+ *     Initialize the route table for this bridge.
+ */
+static int
+bridge_rtable_init(struct bridge_softc *sc)
+{
+       int i;
+       
+       sc->sc_rthash = _MALLOC(sizeof(*sc->sc_rthash) * BRIDGE_RTHASH_SIZE,
+                            M_DEVBUF, M_WAITOK);
+       if (sc->sc_rthash == NULL)
+               return (ENOMEM);
+       
+       for (i = 0; i < BRIDGE_RTHASH_SIZE; i++)
+               LIST_INIT(&sc->sc_rthash[i]);
+       
+       sc->sc_rthash_key = random();
+       
+       LIST_INIT(&sc->sc_rtlist);
+       
+       return (0);
+}
+
+/*
+ * bridge_rtable_fini:
+ *
+ *     Deconstruct the route table for this bridge.
+ */
+static void
+bridge_rtable_fini(struct bridge_softc *sc)
+{
+       
+       _FREE(sc->sc_rthash, M_DEVBUF);
+}
+
+/*
+ * The following hash function is adapted from "Hash Functions" by Bob Jenkins
+ * ("Algorithm Alley", Dr. Dobbs Journal, September 1997).
+ */
+#define        mix(a, b, c)                                                    \
+do {                                                                   \
+a -= b; a -= c; a ^= (c >> 13);                                        \
+b -= c; b -= a; b ^= (a << 8);                                 \
+c -= a; c -= b; c ^= (b >> 13);                                        \
+a -= b; a -= c; a ^= (c >> 12);                                        \
+b -= c; b -= a; b ^= (a << 16);                                        \
+c -= a; c -= b; c ^= (b >> 5);                                 \
+a -= b; a -= c; a ^= (c >> 3);                                 \
+b -= c; b -= a; b ^= (a << 10);                                        \
+c -= a; c -= b; c ^= (b >> 15);                                        \
+} while (/*CONSTCOND*/0)
+
+static uint32_t
+bridge_rthash(__unused struct bridge_softc *sc, const uint8_t *addr)
+{
+       /* APPLE MODIFICATION - wasabi performance improvment - simplify the hash algorithm */
+#if 0
+       uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = sc->sc_rthash_key;
+       
+       b += addr[5] << 8;
+       b += addr[4];
+       a += addr[3] << 24;
+       a += addr[2] << 16;
+       a += addr[1] << 8;
+       a += addr[0];
+       
+       mix(a, b, c);
+       
+       return (c & BRIDGE_RTHASH_MASK);
+#else
+       return addr[5];
+#endif
+}
+
+#undef mix
+
+/*
+ * bridge_rtnode_lookup:
+ *
+ *     Look up a bridge route node for the specified destination.
+ */
+static struct bridge_rtnode *
+bridge_rtnode_lookup(struct bridge_softc *sc, const uint8_t *addr)
+{
+       struct bridge_rtnode *brt;
+       uint32_t hash;
+       int dir;
+       
+       hash = bridge_rthash(sc, addr);
+       LIST_FOREACH(brt, &sc->sc_rthash[hash], brt_hash) {
+               dir = memcmp(addr, brt->brt_addr, ETHER_ADDR_LEN);
+               if (dir == 0)
+                       return (brt);
+               if (dir > 0)
+                       return (NULL);
+       }
+       
+       return (NULL);
+}
+
+/*
+ * bridge_rtnode_insert:
+ *
+ *     Insert the specified bridge node into the route table.  We
+ *     assume the entry is not already in the table.
+ */
+static int
+bridge_rtnode_insert(struct bridge_softc *sc, struct bridge_rtnode *brt)
+{
+       struct bridge_rtnode *lbrt;
+       uint32_t hash;
+       int dir;
+       
+       hash = bridge_rthash(sc, brt->brt_addr);
+       
+       lbrt = LIST_FIRST(&sc->sc_rthash[hash]);
+       if (lbrt == NULL) {
+               LIST_INSERT_HEAD(&sc->sc_rthash[hash], brt, brt_hash);
+               goto out;
+       }
+       
+       do {
+               dir = memcmp(brt->brt_addr, lbrt->brt_addr, ETHER_ADDR_LEN);
+               if (dir == 0)
+                       return (EEXIST);
+               if (dir > 0) {
+                       LIST_INSERT_BEFORE(lbrt, brt, brt_hash);
+                       goto out;
+               }
+               if (LIST_NEXT(lbrt, brt_hash) == NULL) {
+                       LIST_INSERT_AFTER(lbrt, brt, brt_hash);
+                       goto out;
+               }
+               lbrt = LIST_NEXT(lbrt, brt_hash);
+       } while (lbrt != NULL);
+       
+#ifdef DIAGNOSTIC
+       panic("bridge_rtnode_insert: impossible");
+#endif
+       
+out:
+       LIST_INSERT_HEAD(&sc->sc_rtlist, brt, brt_list);
+       sc->sc_brtcnt++;
+       
+       return (0);
+}
+
+/*
+ * bridge_rtnode_destroy:
+ *
+ *     Destroy a bridge rtnode.
+ */
+static void
+bridge_rtnode_destroy(struct bridge_softc *sc, struct bridge_rtnode *brt)
+{
+       lck_mtx_assert(sc->sc_mtx, LCK_MTX_ASSERT_OWNED);
+       
+       /* APPLE MODIFICATION <cbz@apple.com> - add support for Proxy STA */
+#if IEEE80211_PROXYSTA
+       if (brt->brt_flags_ext & IFBAF_EXT_PROXYSTA) {
+#if DIAGNOSTIC
+               printf( "%s: proxysta %02x:%02x:%02x:%02x:%02x:%02x %s from %s; idle timeout\n",
+               __func__, brt->brt_addr[0], brt->brt_addr[1], brt->brt_addr[2], 
+               brt->brt_addr[3], brt->brt_addr[4], brt->brt_addr[5], 
+               brt->brt_ifp_proxysta ? brt->brt_ifp_proxysta->if_xname : "unknown",
+               brt->brt_ifp->if_xname );
+#endif
+               bridge_proxysta_idle_timeout( brt->brt_ifp, brt->brt_addr );    
+       }
+#endif
+       
+       LIST_REMOVE(brt, brt_hash);
+       
+       LIST_REMOVE(brt, brt_list);
+       sc->sc_brtcnt--;
+       zfree(bridge_rtnode_pool, brt);
+}
+
+static errno_t
+bridge_set_bpf_tap(ifnet_t ifp, bpf_tap_mode mode, bpf_packet_func bpf_callback)
+{
+       struct bridge_softc *sc = (struct bridge_softc *)ifnet_softc(ifp);
+       
+       //printf("bridge_set_bpf_tap ifp %p mode %d\n", ifp, mode);
+       
+       /* TBD locking */
+       if (sc == NULL || (sc->sc_flags & SCF_DETACHING)) {
+               return ENODEV;
+       }
+       
+       switch (mode) {
+               case BPF_TAP_DISABLE:
+                       sc->sc_bpf_input = sc->sc_bpf_output = NULL;
+                       break;
+                       
+               case BPF_TAP_INPUT:
+                       sc->sc_bpf_input = bpf_callback;
+                       break;
+                       
+               case BPF_TAP_OUTPUT:
+                       sc->sc_bpf_output = bpf_callback;
+                       break;
+                       
+               case BPF_TAP_INPUT_OUTPUT:
+                       sc->sc_bpf_input = sc->sc_bpf_output = bpf_callback;
+                       break;
+                       
+               default:
+                       break;
+       }
+       
+       return 0;
+}
+
+static void
+bridge_detach(__unused ifnet_t ifp)
+{
+       struct bridge_softc *sc = (struct bridge_softc *)ifnet_softc(ifp);
+       
+       /* Tear down the routing table. */
+       bridge_rtable_fini(sc);
+       
+       lck_rw_lock_exclusive(bridge_list_lock);
+       LIST_REMOVE(sc, sc_list);
+       lck_rw_done(bridge_list_lock);
+       
+       ifnet_release(ifp);
+       
+       lck_mtx_free(sc->sc_mtx, bridge_lock_grp);
+       
+       _FREE(sc, M_DEVBUF);
+       return;
+}
+
+__private_extern__ errno_t bridge_bpf_input(ifnet_t ifp, struct mbuf *m)
+{
+       struct bridge_softc *sc = (struct bridge_softc *)ifnet_softc(ifp);
+       
+       if (sc->sc_bpf_input) {
+               if (mbuf_pkthdr_rcvif(m) != ifp)
+                       printf("bridge_bpf_input rcvif: %p != ifp %p\n", mbuf_pkthdr_rcvif(m), ifp);
+               (*sc->sc_bpf_input)(ifp, m);
+       }
+       return 0;
+}
+
+__private_extern__ errno_t bridge_bpf_output(ifnet_t ifp, struct mbuf *m)
+{
+       struct bridge_softc *sc = (struct bridge_softc *)ifnet_softc(ifp);
+       
+       if (sc->sc_bpf_output) {
+               (*sc->sc_bpf_output)(ifp, m);
+       }
+       return 0;
+}
+
diff --git a/bsd/net/if_bridgevar.h b/bsd/net/if_bridgevar.h
new file mode 100644 (file)
index 0000000..6b47c92
--- /dev/null
@@ -0,0 +1,484 @@
+/*
+ * Copyright (c) 2004-2009 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+/*     $apfw: if_bridgevar,v 1.7 2008/10/24 02:34:06 cbzimmer Exp $ */
+/*     $NetBSD: if_bridgevar.h,v 1.8 2005/12/10 23:21:38 elad Exp $    */
+
+/*
+ * Copyright 2001 Wasabi Systems, Inc.
+ * All rights reserved.
+ *
+ * Written by Jason R. Thorpe for Wasabi Systems, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *     This product includes software developed for the NetBSD Project by
+ *     Wasabi Systems, Inc.
+ * 4. The name of Wasabi Systems, Inc. may not be used to endorse
+ *    or promote products derived from this software without specific prior
+ *    written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Copyright (c) 1999, 2000 Jason L. Wright (jason@thought.net)
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *     This product includes software developed by Jason L. Wright
+ * 4. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * OpenBSD: if_bridge.h,v 1.14 2001/03/22 03:48:29 jason Exp
+ */
+
+/*
+ * Data structure and control definitions for bridge interfaces.
+ */
+
+#ifndef _NET_IF_BRIDGEVAR_H_
+#define _NET_IF_BRIDGEVAR_H_
+
+#ifdef PRIVATE
+
+#include <sys/queue.h>
+
+#include <net/if.h>
+#include <net/ethernet.h>
+
+/*
+ * Commands used in the SIOCSDRVSPEC ioctl.  Note the lookup of the
+ * bridge interface itself is keyed off the ifdrv structure.
+ */
+#define        BRDGADD                 0       /* add bridge member (ifbreq) */
+#define        BRDGDEL                 1       /* delete bridge member (ifbreq) */
+#define        BRDGGIFFLGS             2       /* get member if flags (ifbreq) */
+#define        BRDGSIFFLGS             3       /* set member if flags (ifbreq) */
+#define        BRDGSCACHE              4       /* set cache size (ifbrparam) */
+#define        BRDGGCACHE              5       /* get cache size (ifbrparam) */
+#define        BRDGGIFS                6       /* get member list (ifbifconf) */
+#define        BRDGRTS                 7       /* get address list (ifbaconf) */
+#define        BRDGSADDR               8       /* set static address (ifbareq) */
+#define        BRDGSTO                 9       /* set cache timeout (ifbrparam) */
+#define        BRDGGTO                 10      /* get cache timeout (ifbrparam) */
+#define        BRDGDADDR               11      /* delete address (ifbareq) */
+#define        BRDGFLUSH               12      /* flush address cache (ifbreq) */
+
+#define        BRDGGPRI                13      /* get priority (ifbrparam) */
+#define        BRDGSPRI                14      /* set priority (ifbrparam) */
+#define        BRDGGHT                 15      /* get hello time (ifbrparam) */
+#define        BRDGSHT                 16      /* set hello time (ifbrparam) */
+#define        BRDGGFD                 17      /* get forward delay (ifbrparam) */
+#define        BRDGSFD                 18      /* set forward delay (ifbrparam) */
+#define        BRDGGMA                 19      /* get max age (ifbrparam) */
+#define        BRDGSMA                 20      /* set max age (ifbrparam) */
+#define        BRDGSIFPRIO             21      /* set if priority (ifbreq) */
+#define BRDGSIFCOST            22      /* set if path cost (ifbreq) */
+#define BRDGGFILT              23      /* get filter flags (ifbrparam) */
+#define BRDGSFILT              24      /* set filter flags (ifbrparam) */
+#define        BRDGPURGE               25      /* purge address cache for a particular interface (ifbreq) */
+
+/*
+ * Generic bridge control request.
+ */
+#pragma pack(4)
+
+struct ifbreq {
+       char            ifbr_ifsname[IFNAMSIZ]; /* member if name */
+       uint32_t        ifbr_ifsflags;          /* member if flags */
+        uint16_t        ifbr_portno;            /* member if port number */
+       uint8_t         ifbr_state;             /* member if STP state */
+       uint8_t         ifbr_priority;          /* member if STP priority */
+       uint8_t         ifbr_path_cost;         /* member if STP cost */
+};
+
+#pragma pack()
+
+/* BRDGGIFFLAGS, BRDGSIFFLAGS */
+#define        IFBIF_LEARNING          0x01    /* if can learn */
+#define        IFBIF_DISCOVER          0x02    /* if sends packets w/ unknown dest. */
+#define        IFBIF_STP               0x04    /* if participates in spanning tree */
+/* APPLE MODIFICATION <cbz@apple.com>
+ add the following bits for ProxySTA:
+ IFBIF_PROXYSTA, IFBIF_PROXYSTA_DISCOVER
+ add the following bits for Guest Network      
+ IFBIF_NO_FORWARDING
+ */
+#define        IFBIF_PROXYSTA                          0x08    /* if interface is a proxy sta */
+#define        IFBIF_PROXYSTA_DISCOVER         0x10    /* if interface is used to discover proxy sta candidates */
+#define        IFBIF_NO_FORWARDING                 0x20        /* if interface cannot forward traffic from one interface to the next */
+
+/* APPLE MODIFICATION <cbz@apple.com> 
+ add the following bits for ProxySTA:
+ PROXYSTA, PROXYSTA_DISCOVER
+ add the following bits for Guest Network      
+ NO_FORWARDING
+ this was...   
+ #define       IFBIFBITS       "\020\1LEARNING\2DISCOVER\3STP"
+ */
+#define        IFBIFBITS       "\020\1LEARNING\2DISCOVER\3STP\4PROXYSTA\5PROXYSTA_DISCOVER\6NO_FORWARDING"
+
+/* BRDGFLUSH */
+#define        IFBF_FLUSHDYN           0x00    /* flush learned addresses only */
+#define        IFBF_FLUSHALL           0x01    /* flush all addresses */
+
+/* BRDGSFILT */
+#define IFBF_FILT_USEIPF       0x00000001 /* run pfil hooks on the bridge
+interface */
+#define IFBF_FILT_MEMBER       0x00000002 /* run pfil hooks on the member
+interfaces */
+#define IFBF_FILT_ONLYIP       0x00000004 /* only pass IP[46] packets when
+pfil is enabled */
+#define IFBF_FILT_MASK         0x00000007 /* mask of valid values */
+
+
+/* APPLE MODIFICATION <jhw@apple.com>: Default is to pass non-IP packets. */
+#define        IFBF_FILT_DEFAULT       ( IFBF_FILT_USEIPF | IFBF_FILT_MEMBER )
+#if 0
+#define        IFBF_FILT_DEFAULT       (IFBF_FILT_USEIPF | \
+IFBF_FILT_MEMBER | \
+IFBF_FILT_ONLYIP)
+#endif
+
+/* STP port states */
+#define        BSTP_IFSTATE_DISABLED   0
+#define        BSTP_IFSTATE_LISTENING  1
+#define        BSTP_IFSTATE_LEARNING   2
+#define        BSTP_IFSTATE_FORWARDING 3
+#define        BSTP_IFSTATE_BLOCKING   4
+
+/*
+ * Interface list structure.
+ */
+
+#pragma pack(4)
+
+struct ifbifconf {
+       uint32_t        ifbic_len;      /* buffer size */
+       union {
+               caddr_t ifbicu_buf;
+               struct ifbreq *ifbicu_req;
+       } ifbic_ifbicu;
+#define        ifbic_buf       ifbic_ifbicu.ifbicu_buf
+#define        ifbic_req       ifbic_ifbicu.ifbicu_req
+};
+
+#ifdef KERNEL_PRIVATE
+struct ifbifconf32 {
+       uint32_t        ifbic_len;      /* buffer size */
+       union {
+               user32_addr_t   ifbicu_buf;
+               user32_addr_t   ifbicu_req;
+       } ifbic_ifbicu;
+};
+
+struct ifbifconf64 {
+       uint32_t        ifbic_len;      /* buffer size */
+       union {
+               user64_addr_t   ifbicu_buf;
+               user64_addr_t   ifbicu_req;
+       } ifbic_ifbicu;
+};
+#endif /* KERNEL_PRIVATE */
+
+#pragma pack()
+
+/*
+ * Bridge address request.
+ */
+
+#pragma pack(4)
+
+struct ifbareq {
+       char            ifba_ifsname[IFNAMSIZ]; /* member if name */
+       unsigned long   ifba_expire;            /* address expire time */
+       uint8_t         ifba_flags;             /* address flags */
+       uint8_t         ifba_dst[ETHER_ADDR_LEN];/* destination address */
+};
+
+#ifdef KERNEL_PRIVATE
+struct ifbareq32 {
+       char            ifba_ifsname[IFNAMSIZ]; /* member if name */
+       uint32_t        ifba_expire;            /* address expire time */
+       uint8_t         ifba_flags;             /* address flags */
+       uint8_t         ifba_dst[ETHER_ADDR_LEN];/* destination address */
+};
+
+struct ifbareq64 {
+       char            ifba_ifsname[IFNAMSIZ]; /* member if name */
+       uint64_t        ifba_expire;            /* address expire time */
+       uint8_t         ifba_flags;             /* address flags */
+       uint8_t         ifba_dst[ETHER_ADDR_LEN];/* destination address */
+};
+#endif /* KERNEL_PRIVATE */
+
+#pragma pack()
+
+#define        IFBAF_TYPEMASK  0x03    /* address type mask */
+#define        IFBAF_DYNAMIC   0x00    /* dynamically learned address */
+#define        IFBAF_STATIC    0x01    /* static address */
+
+#define        IFBAFBITS       "\020\1STATIC"
+
+/*
+ * Address list structure.
+ */
+
+#pragma pack(4)
+
+struct ifbaconf {
+       uint32_t        ifbac_len;      /* buffer size */
+       union {
+               caddr_t ifbacu_buf;
+               struct ifbareq *ifbacu_req;
+       } ifbac_ifbacu;
+#define        ifbac_buf       ifbac_ifbacu.ifbacu_buf
+#define        ifbac_req       ifbac_ifbacu.ifbacu_req
+};
+
+#ifdef KERNEL_PRIVATE
+struct ifbaconf32 {
+       uint32_t        ifbac_len;      /* buffer size */
+       union {
+               user32_addr_t   ifbacu_buf;
+               user32_addr_t   ifbacu_req;
+       } ifbac_ifbacu;
+};
+
+struct ifbaconf64 {
+       uint32_t        ifbac_len;      /* buffer size */
+       union {
+               user64_addr_t   ifbacu_buf;
+               user64_addr_t   ifbacu_req;
+       } ifbac_ifbacu;
+};
+#endif /* KERNEL_PRIVATE */
+
+#pragma pack()
+
+/*
+ * Bridge parameter structure.
+ */
+
+#pragma pack(4)
+
+struct ifbrparam {
+       union {
+               uint32_t ifbrpu_int32;
+               uint16_t ifbrpu_int16;
+               uint8_t ifbrpu_int8;
+       } ifbrp_ifbrpu;
+};
+
+#pragma pack()
+
+#define        ifbrp_csize     ifbrp_ifbrpu.ifbrpu_int32       /* cache size */
+#define        ifbrp_ctime     ifbrp_ifbrpu.ifbrpu_int32       /* cache time (sec) */
+#define        ifbrp_prio      ifbrp_ifbrpu.ifbrpu_int16       /* bridge priority */
+#define        ifbrp_hellotime ifbrp_ifbrpu.ifbrpu_int8        /* hello time (sec) */
+#define        ifbrp_fwddelay  ifbrp_ifbrpu.ifbrpu_int8        /* fwd time (sec) */
+#define        ifbrp_maxage    ifbrp_ifbrpu.ifbrpu_int8        /* max age (sec) */
+#define        ifbrp_filter    ifbrp_ifbrpu.ifbrpu_int32       /* filtering flags */
+
+#ifdef KERNEL
+/*
+ * Timekeeping structure used in spanning tree code.
+ */
+struct bridge_timer {
+       uint16_t        active;
+       uint16_t        value;
+};
+
+struct bstp_config_unit {
+       uint64_t        cu_rootid;
+       uint64_t        cu_bridge_id;
+       uint32_t        cu_root_path_cost;
+       uint16_t        cu_message_age;
+       uint16_t        cu_max_age;
+       uint16_t        cu_hello_time;
+       uint16_t        cu_forward_delay;
+       uint16_t        cu_port_id;
+       uint8_t         cu_message_type;
+       uint8_t         cu_topology_change_acknowledgment;
+       uint8_t         cu_topology_change;
+};
+
+struct bstp_tcn_unit {
+       uint8_t         tu_message_type;
+};
+
+struct bridge_softc;
+
+/*
+ * Bridge interface list entry.
+ * (VL) bridge_ifmember would be a better name, more descriptive
+ */
+struct bridge_iflist {
+       LIST_ENTRY(bridge_iflist) bif_next;
+       uint64_t                bif_designated_root;
+       uint64_t                bif_designated_bridge;
+       uint32_t                bif_path_cost;
+       uint32_t                bif_designated_cost;
+       struct bridge_timer     bif_hold_timer;
+       struct bridge_timer     bif_message_age_timer;
+       struct bridge_timer     bif_forward_delay_timer;
+       uint16_t                bif_port_id;
+       uint16_t                bif_designated_port;
+       struct bstp_config_unit bif_config_bpdu;
+       uint8_t                 bif_state;
+       uint8_t                 bif_topology_change_acknowledge;
+       uint8_t                 bif_config_pending;
+       uint8_t                 bif_change_detection_enabled;
+       uint8_t                 bif_priority;
+       struct ifnet    *bif_ifp;       /* member if */
+       uint32_t                bif_flags;      /* member if flags */
+       int                             bif_mutecap;    /* member muted caps */
+       interface_filter_t      bif_iff_ref;
+       struct bridge_softc *bif_sc;
+};
+
+/*
+ * Bridge route node.
+ */
+struct bridge_rtnode {
+       LIST_ENTRY(bridge_rtnode) brt_hash;     /* hash table linkage */
+       LIST_ENTRY(bridge_rtnode) brt_list;     /* list linkage */
+       struct ifnet            *brt_ifp;       /* destination if */
+       unsigned long           brt_expire;     /* expiration time */
+       uint8_t                 brt_flags;      /* address flags */
+       uint8_t                 brt_addr[ETHER_ADDR_LEN];
+       /* APPLE MODIFICATION <cbz@apple.com> - add the following elements:
+     brt_flags_ext, brt_ifp_proxysta */
+#define IFBAF_EXT_PROXYSTA  0x01
+       uint8_t                 brt_flags_ext;  /* extended flags */
+       struct ifnet    *brt_ifp_proxysta;      /* proxy sta if */
+};
+
+
+/*
+ * Software state for each bridge.
+ */
+struct bridge_softc {
+       LIST_ENTRY(bridge_softc) sc_list;
+       struct ifnet    *sc_if;
+       uint64_t                sc_designated_root;
+       uint64_t                sc_bridge_id;
+       struct bridge_iflist    *sc_root_port;
+       uint32_t                sc_root_path_cost;
+       uint16_t                sc_max_age;
+       uint16_t                sc_hello_time;
+       uint16_t                sc_forward_delay;
+       uint16_t                sc_bridge_max_age;
+       uint16_t                sc_bridge_hello_time;
+       uint16_t                sc_bridge_forward_delay;
+       uint16_t                sc_topology_change_time;
+       uint16_t                sc_hold_time;
+       uint16_t                sc_bridge_priority;
+       uint8_t                 sc_topology_change_detected;
+       uint8_t                 sc_topology_change;
+       struct bridge_timer     sc_hello_timer;
+       struct bridge_timer     sc_topology_change_timer;
+       struct bridge_timer     sc_tcn_timer;
+       uint32_t                sc_brtmax;      /* max # of addresses */
+       uint32_t                sc_brtcnt;      /* cur. # of addresses */
+       /* APPLE MODIFICATION <cbz@apple.com> - add the following elements:
+     sc_brtmax_proxysta */
+       uint32_t                sc_brtmax_proxysta;     /* max # of proxy sta addresses */
+       uint32_t                sc_brttimeout;  /* rt timeout in seconds */
+       LIST_HEAD(, bridge_iflist) sc_iflist;   /* member interface list */
+       LIST_HEAD(, bridge_rtnode) *sc_rthash;  /* our forwarding table */
+       LIST_HEAD(, bridge_rtnode) sc_rtlist;   /* list version of above */
+       uint32_t                sc_rthash_key;  /* key for hash */
+       uint32_t                sc_filter_flags; /* ipf and flags */
+    
+       //(VL)
+       char                    sc_if_xname[IFNAMSIZ];
+    bpf_packet_func    sc_bpf_input;
+    bpf_packet_func    sc_bpf_output;
+    u_int32_t          sc_flags;
+    lck_mtx_t          *sc_mtx;
+};
+
+#define SCF_DETACHING 0x1
+
+extern const uint8_t bstp_etheraddr[];
+
+int    bridgeattach(int);
+void   bridge_enqueue(struct bridge_softc *, struct ifnet *, struct mbuf *);
+void   bridge_rtdelete(struct bridge_softc *, struct ifnet *, int);
+
+void   bstp_initialization(struct bridge_softc *);
+void   bstp_stop(struct bridge_softc *);
+struct mbuf *bstp_input(struct bridge_softc *, struct ifnet *, struct mbuf *);
+
+
+#endif /* KERNEL */
+#endif /* PRIVATE */
+#endif /* !_NET_IF_BRIDGEVAR_H_ */
+
index 8d82c530d45bbd33ce4eff5c8229f1b0426cd048..e407e009fa8fe6e3d498bc026affbd3cef26aa88 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000, 2009 Apple Computer, Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
 extern struct ifqueue pkintrq;
 #endif
 
-#if BRIDGE
-#include <net/bridge.h>
-#endif
-
 /* #include "vlan.h" */
 #if NVLAN > 0
 #include <net/if_vlan_var.h>
index 7b0d446e2d92c2ece31bfa5b4230fee59f9fa863..dade70621211095fa54b709140b9358590422a85 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000,2009 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- *    must display the following acknowledgement:
- *     This product includes software developed by the University of
- *     California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
+ * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
@@ -57,7 +53,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- *      @(#)if_llc.h   8.1 (Berkeley) 6/10/93
+ *     @(#)if_llc.h    8.1 (Berkeley) 6/10/93
  */
 
 #ifndef _NET_IF_LLC_H_
  */
 
 struct llc {
-       u_char  llc_dsap;
-       u_char  llc_ssap;
+       u_int8_t llc_dsap;
+       u_int8_t llc_ssap;
        union {
            struct {
-               u_char control;
-               u_char format_id;
-               u_char class_id;
-               u_char window_x2;
+               u_int8_t control;
+               u_int8_t format_id;
+               u_int8_t class_id;
+               u_int8_t window_x2;
            } type_u;
            struct {
-               u_char num_snd_x2;
-               u_char num_rcv_x2;
+               u_int8_t num_snd_x2;
+               u_int8_t num_rcv_x2;
            } type_i;
            struct {
-               u_char control;
-               u_char num_rcv_x2;
+               u_int8_t control;
+               u_int8_t num_rcv_x2;
            } type_s;
            struct {
-               u_char control;
-               struct frmrinfo {
-                       u_char rej_pdu_0;
-                       u_char rej_pdu_1;
-                       u_char frmr_control;
-                       u_char frmr_control_ext;
-                       u_char frmr_cause;
-               } frmrinfo;
+               u_int8_t control;
+               /*
+                * We cannot put the following fields in a structure because
+                * the structure rounding might cause padding.
+                */
+               u_int8_t frmr_rej_pdu0;
+               u_int8_t frmr_rej_pdu1;
+               u_int8_t frmr_control;
+               u_int8_t frmr_control_ext;
+               u_int8_t frmr_cause;
            } type_frmr;
            struct {
-               u_char control;
-               u_char org_code[3];
-               u_short ether_type;
-           } type_snap;
+               u_int8_t  control;
+               u_int8_t  org_code[3];
+               u_int16_t ether_type;
+           } type_snap __attribute__((__packed__));
            struct {
-               u_char control;
-               u_char control_ext;
+               u_int8_t control;
+               u_int8_t control_ext;
            } type_raw;
        } llc_un;
-};
-#define llc_control            llc_un.type_u.control
-#define        llc_control_ext        llc_un.type_raw.control_ext
-#define llc_fid                llc_un.type_u.format_id
-#define llc_class              llc_un.type_u.class_id
-#define llc_window             llc_un.type_u.window_x2
-#define llc_frmrinfo           llc_un.type_frmr.frmrinfo
-#define llc_frmr_pdu0          llc_un.type_frmr.frmrinfo.rej_pdu0
-#define llc_frmr_pdu1          llc_un.type_frmr.frmrinfo.rej_pdu1
-#define llc_frmr_control       llc_un.type_frmr.frmrinfo.frmr_control
-#define llc_frmr_control_ext   llc_un.type_frmr.frmrinfo.frmr_control_ext
-#define llc_frmr_cause         llc_un.type_frmr.frmrinfo.frmr_control_ext
+} __attribute__((__packed__));
+
+struct frmrinfo {
+       u_int8_t frmr_rej_pdu0;
+       u_int8_t frmr_rej_pdu1;
+       u_int8_t frmr_control;
+       u_int8_t frmr_control_ext;
+       u_int8_t frmr_cause;
+} __attribute__((__packed__));
+
+#define        llc_control             llc_un.type_u.control
+#define        llc_control_ext         llc_un.type_raw.control_ext
+#define        llc_fid                 llc_un.type_u.format_id
+#define        llc_class               llc_un.type_u.class
+#define        llc_window              llc_un.type_u.window_x2
+#define        llc_frmrinfo            llc_un.type_frmr.frmr_rej_pdu0
+#define        llc_frmr_pdu0           llc_un.type_frmr.frmr_rej_pdu0
+#define        llc_frmr_pdu1           llc_un.type_frmr.frmr_rej_pdu1
+#define        llc_frmr_control        llc_un.type_frmr.frmr_control
+#define        llc_frmr_control_ext    llc_un.type_frmr.frmr_control_ext
+#define        llc_frmr_cause          llc_un.type_frmr.frmr_cause
+#define        llc_snap                llc_un.type_snap
 
 /*
  * Don't use sizeof(struct llc_un) for LLC header sizes
@@ -129,6 +137,7 @@ struct llc {
 #define LLC_ISFRAMELEN 4
 #define LLC_UFRAMELEN  3
 #define LLC_FRMRLEN    7
+#define LLC_SNAPFRAMELEN 8
 
 /*
  * Unnumbered LLC format commands
@@ -165,8 +174,22 @@ struct llc {
 /*
  * ISO PDTR 10178 contains among others
  */
+#define        LLC_8021D_LSAP  0x42
 #define LLC_X25_LSAP   0x7e
 #define LLC_SNAP_LSAP  0xaa
 #define LLC_ISO_LSAP   0xfe
 
-#endif
+/*
+ * LLC XID definitions from 802.2, as needed
+ */
+
+#define LLC_XID_FORMAT_BASIC   0x81
+#define LLC_XID_BASIC_MINLEN   (LLC_UFRAMELEN + 3)
+
+#define LLC_XID_CLASS_I        0x1
+#define LLC_XID_CLASS_II       0x3
+#define LLC_XID_CLASS_III      0x5
+#define LLC_XID_CLASS_IV       0x7
+
+
+#endif /* !_NET_IF_LLC_H_ */
index a8b580130dc755899a07d154ee7e7ca02e2c84a7..4eced169bd24793b197a0c18fbaa01849731dcb3 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2008 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
 #define        IFT_L2VLAN      0x87            /* Layer 2 Virtual LAN using 802.1Q */
 #define IFT_IEEE8023ADLAG 0x88         /* IEEE802.3ad Link Aggregate */
 #define        IFT_IEEE1394    0x90            /* IEEE1394 High Performance SerialBus*/
+#define IFT_BRIDGE     0xd1            /* Transparent bridge interface */
 
 /*
  * These are not based on IANA assignments:
index 67d52d0a22ca1456231e3e911c51d5994001d5f6..0601b78729a0384523eb482db6eaba8261a78b14 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -487,6 +487,7 @@ struct ifnet {
        void            *if_fwd_route_lock;
 #endif
        struct route    if_fwd_route;   /* cached IPv4 forwarding route */
+       void    *if_bridge;             /* bridge glue */
 };
 
 #ifndef __APPLE__
index e1be1efd0326aeb7483b7e89176884f3e2e202fb..8ebcfb84189bd7e5d08f00482dfc41fed2f2c50a 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003-2008 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2003-2009 Apple Computer, Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
 #include <net/multicast_list.h>
 #include <net/ether_if_module.h>
 
-#define        IF_MAXUNIT              0x7fff  /* historical value */
-
 #define VLANNAME       "vlan"
 
 typedef int (bpf_callback_func)(struct ifnet *, struct mbuf *);
index cbc32f35dd242ed8e472dc27e33deac951a3fc61..5529d8056cf39b798a45afdf5d5e99e7a60cf246 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2009 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
@@ -26,7 +26,7 @@
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
 
-/*     $apfw: pf.c,v 1.37 2008/12/05 23:10:20 jhw Exp $ */
+/*     $apfw: git commit 7c8016ea91f7b68950cf41729c92dd8e3e423ba7 $ */
 /*     $OpenBSD: pf.c,v 1.567 2008/02/20 23:40:13 henning Exp $ */
 
 /*
@@ -272,7 +272,7 @@ static int           pf_test_state_tcp(struct pf_state **, int,
                            void *, struct pf_pdesc *, u_short *);
 static int              pf_test_state_udp(struct pf_state **, int,
                            struct pfi_kif *, struct mbuf *, int,
-                           void *, struct pf_pdesc *);
+                           void *, struct pf_pdesc *, u_short *);
 static int              pf_test_state_icmp(struct pf_state **, int,
                            struct pfi_kif *, struct mbuf *, int,
                            void *, struct pf_pdesc *, u_short *);
@@ -469,22 +469,32 @@ pf_state_lookup_aux(struct pf_state **state, struct pfi_kif *kif,
 #define BOUND_IFACE(r, k) \
        ((r)->rule_flag & PFRULE_IFBOUND) ? (k) : pfi_all
 
-#define STATE_INC_COUNTERS(s)                          \
-       do {                                            \
-               s->rule.ptr->states++;                  \
-               if (s->anchor.ptr != NULL)              \
-                       s->anchor.ptr->states++;        \
-               if (s->nat_rule.ptr != NULL)            \
-                       s->nat_rule.ptr->states++;      \
+#define STATE_INC_COUNTERS(s)                                  \
+       do {                                                    \
+               s->rule.ptr->states++;                          \
+               VERIFY(s->rule.ptr->states != 0);               \
+               if (s->anchor.ptr != NULL) {                    \
+                       s->anchor.ptr->states++;                \
+                       VERIFY(s->anchor.ptr->states != 0);     \
+               }                                               \
+               if (s->nat_rule.ptr != NULL) {                  \
+                       s->nat_rule.ptr->states++;              \
+                       VERIFY(s->nat_rule.ptr->states != 0);   \
+               }                                               \
        } while (0)
 
-#define STATE_DEC_COUNTERS(s)                          \
-       do {                                            \
-               if (s->nat_rule.ptr != NULL)            \
-                       s->nat_rule.ptr->states--;      \
-               if (s->anchor.ptr != NULL)              \
-                       s->anchor.ptr->states--;        \
-               s->rule.ptr->states--;                  \
+#define STATE_DEC_COUNTERS(s)                                  \
+       do {                                                    \
+               if (s->nat_rule.ptr != NULL) {                  \
+                       VERIFY(s->nat_rule.ptr->states > 0);    \
+                       s->nat_rule.ptr->states--;              \
+               }                                               \
+               if (s->anchor.ptr != NULL) {                    \
+                       VERIFY(s->anchor.ptr->states > 0);      \
+                       s->anchor.ptr->states--;                \
+               }                                               \
+               VERIFY(s->rule.ptr->states > 0);                \
+               s->rule.ptr->states--;                          \
        } while (0)
 
 static __inline int pf_src_compare(struct pf_src_node *, struct pf_src_node *);
@@ -512,8 +522,8 @@ RB_GENERATE(pf_state_tree_id, pf_state,
 #define        PF_DT_SKIP_EXTGWY       0x02
 
 #ifndef NO_APPLE_EXTENSIONS
-static const u_int16_t PF_PPTP_PORT = htons(1723);
-static const u_int32_t PF_PPTP_MAGIC_NUMBER = htonl(0x1A2B3C4D);
+static const u_int16_t PF_PPTP_PORT = 1723;
+static const u_int32_t PF_PPTP_MAGIC_NUMBER = 0x1A2B3C4D;
 
 struct pf_pptp_hdr {
        u_int16_t       length;
@@ -762,7 +772,7 @@ struct pf_grev1_hdr {
        */
 };
 
-static const u_int16_t PF_IKE_PORT = htons(500);
+static const u_int16_t PF_IKE_PORT = 500;
 
 struct pf_ike_hdr {
        u_int64_t initiator_cookie, responder_cookie;
@@ -1351,6 +1361,7 @@ pf_src_connlimit(struct pf_state **state)
        int bad = 0;
 
        (*state)->src_node->conn++;
+       VERIFY((*state)->src_node->conn != 0);
        (*state)->src.tcp_est = 1;
        pf_add_threshold(&(*state)->src_node->conn_rate);
 
@@ -1612,6 +1623,7 @@ pf_insert_state(struct pfi_kif *kif, struct pf_state *s)
        TAILQ_INSERT_TAIL(&state_list, s, entry_list);
        pf_status.fcounters[FCNT_STATE_INSERT]++;
        pf_status.states++;
+       VERIFY(pf_status.states != 0);
        pfi_kif_ref(kif, PFI_KIF_REF_STATE);
 #if NPFSYNC
        pfsync_insert_state(s);
@@ -1751,8 +1763,11 @@ pf_src_tree_remove_state(struct pf_state *s)
        lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED);
 
        if (s->src_node != NULL) {
-               if (s->src.tcp_est)
+               if (s->src.tcp_est) {
+                       VERIFY(s->src_node->conn > 0);
                        --s->src_node->conn;
+               }
+               VERIFY(s->src_node->states > 0);
                if (--s->src_node->states <= 0) {
                        t = s->rule.ptr->timeout[PFTM_SRC_NODE];
                        if (!t)
@@ -1761,6 +1776,7 @@ pf_src_tree_remove_state(struct pf_state *s)
                }
        }
        if (s->nat_src_node != s->src_node && s->nat_src_node != NULL) {
+               VERIFY(s->nat_src_node->states > 0);
                if (--s->nat_src_node->states <= 0) {
                        t = s->rule.ptr->timeout[PFTM_SRC_NODE];
                        if (!t)
@@ -1819,16 +1835,21 @@ pf_free_state(struct pf_state *cur)
                return;
 #endif
        VERIFY(cur->timeout == PFTM_UNLINKED);
+       VERIFY(cur->rule.ptr->states > 0);
        if (--cur->rule.ptr->states <= 0 &&
            cur->rule.ptr->src_nodes <= 0)
                pf_rm_rule(NULL, cur->rule.ptr);
-       if (cur->nat_rule.ptr != NULL)
+       if (cur->nat_rule.ptr != NULL) {
+               VERIFY(cur->nat_rule.ptr->states > 0);
                if (--cur->nat_rule.ptr->states <= 0 &&
                    cur->nat_rule.ptr->src_nodes <= 0)
                        pf_rm_rule(NULL, cur->nat_rule.ptr);
-       if (cur->anchor.ptr != NULL)
+       }
+       if (cur->anchor.ptr != NULL) {
+               VERIFY(cur->anchor.ptr->states > 0);
                if (--cur->anchor.ptr->states <= 0)
                        pf_rm_rule(NULL, cur->anchor.ptr);
+       }
        pf_normalize_tcp_cleanup(cur);
        pfi_kif_unref(cur->kif, PFI_KIF_REF_STATE);
        TAILQ_REMOVE(&state_list, cur, entry_list);
@@ -1836,6 +1857,7 @@ pf_free_state(struct pf_state *cur)
                pf_tag_unref(cur->tag);
        pool_put(&pf_state_pl, cur);
        pf_status.fcounters[FCNT_STATE_REMOVALS]++;
+       VERIFY(pf_status.states > 0);
        pf_status.states--;
 }
 
@@ -3335,8 +3357,8 @@ pf_get_sport(sa_family_t af, u_int8_t proto, struct pf_rule *r,
        unsigned int cut;
        sa_family_t af = pd->af;
        u_int8_t proto = pd->proto;
-       unsigned int low = ntohs(r->rpool.proxy_port[0]);
-       unsigned int high = ntohs(r->rpool.proxy_port[1]);
+       unsigned int low = r->rpool.proxy_port[0];
+       unsigned int high = r->rpool.proxy_port[1];
 #else
        u_int16_t               cut;
 #endif
@@ -3358,7 +3380,7 @@ pf_get_sport(sa_family_t af, u_int8_t proto, struct pf_rule *r,
        if (proto == IPPROTO_UDP) {
 
                /*--- Never float IKE source port ---*/
-               if (sxport->port == PF_IKE_PORT) {
+               if (ntohs(sxport->port) == PF_IKE_PORT) {
                        nxport->port = sxport->port;
                        return (0);
                }
@@ -3387,9 +3409,30 @@ pf_get_sport(sa_family_t af, u_int8_t proto, struct pf_rule *r,
                                return (0);
                        }
                }
+       } else if (proto == IPPROTO_TCP) {
+               struct pf_state* s;
+               /*
+                * APPLE MODIFICATION: <rdar://problem/6546358>
+                * Fix allows....NAT to use a single binding for TCP session
+                * with same source IP and source port
+                */
+               TAILQ_FOREACH(s, &state_list, entry_list) {
+                       struct pf_state_key* sk = s->state_key;
+                       if (!sk)
+                               continue;
+                       if (s->nat_rule.ptr != r)
+                               continue;
+                       if (sk->proto != IPPROTO_TCP || sk->af != af)
+                                continue;
+                       if (sk->lan.xport.port != sxport->port)
+                               continue;
+                       if (!(PF_AEQ(&sk->lan.addr, saddr, af)))
+                               continue;
+                       nxport->port = sk->gwy.xport.port;
+                       return (0);
+               }
        }
 #endif
-
        do {
                key.af = af;
                key.proto = proto;
@@ -3411,7 +3454,6 @@ pf_get_sport(sa_family_t af, u_int8_t proto, struct pf_rule *r,
 #else
                key.ext.port = dport;
 #endif
-
                /*
                 * port search; start random, step;
                 * similar 2 portloop in in_pcbbind
@@ -3577,8 +3619,8 @@ pf_match_translation(struct pf_pdesc *pd, struct mbuf *m, int off,
                    src->neg, kif))
                        r = r->skip[src == &r->src ? PF_SKIP_SRC_ADDR :
                            PF_SKIP_DST_ADDR].ptr;
-               else if (!pf_match_xport(r->proto, r->proto_variant, &src->xport,
-                       sxport))
+               else if (!pf_match_xport(r->proto,
+                   r->proto_variant, &src->xport, sxport))
 #else
                else if (PF_MISMATCHAW(&src->addr, saddr, pd->af,
                    src->neg, kif))
@@ -3945,12 +3987,42 @@ pf_socket_lookup(int direction, struct pf_pdesc *pd)
        case AF_INET:
                inp = in_pcblookup_hash(pi, saddr->v4, sport, daddr->v4, dport,
                    0, NULL);
+#if INET6
+               if (inp == NULL) {
+                       struct in6_addr s6, d6;
+
+                       memset(&s6, 0, sizeof (s6));
+                       s6.s6_addr16[5] = htons(0xffff);
+                       memcpy(&s6.s6_addr32[3], &saddr->v4,
+                           sizeof (saddr->v4));
+
+                       memset(&d6, 0, sizeof (d6));
+                       d6.s6_addr16[5] = htons(0xffff);
+                       memcpy(&d6.s6_addr32[3], &daddr->v4,
+                           sizeof (daddr->v4));
+
+                       inp = in6_pcblookup_hash(pi, &s6, sport,
+                           &d6, dport, 0, NULL);
+                       if (inp == NULL) {
+                               inp = in_pcblookup_hash(pi, saddr->v4, sport,
+                                   daddr->v4, dport, INPLOOKUP_WILDCARD, NULL);
+                               if (inp == NULL) {
+                                       inp = in6_pcblookup_hash(pi, &s6, sport,
+                                           &d6, dport, INPLOOKUP_WILDCARD,
+                                           NULL);
+                                       if (inp == NULL)
+                                               return (-1);
+                               }
+                       }
+               }
+#else
                if (inp == NULL) {
                        inp = in_pcblookup_hash(pi, saddr->v4, sport,
                            daddr->v4, dport, INPLOOKUP_WILDCARD, NULL);
                        if (inp == NULL)
                                return (-1);
                }
+#endif /* !INET6 */
                break;
 #endif /* INET */
 #if INET6
@@ -4983,8 +5055,8 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction,
                        struct udphdr *uh = pd->hdr.udp;
                        size_t plen = m->m_pkthdr.len - off - sizeof (*uh);
 
-                       if (uh->uh_sport == PF_IKE_PORT &&
-                           uh->uh_dport == PF_IKE_PORT &&
+                       if (ntohs(uh->uh_sport) == PF_IKE_PORT &&
+                           ntohs(uh->uh_dport) == PF_IKE_PORT &&
                            plen >= PF_IKE_PACKET_MINSIZE) {
                                if (plen > PF_IKE_PACKET_MINSIZE)
                                        plen = PF_IKE_PACKET_MINSIZE;
@@ -5154,11 +5226,13 @@ cleanup:
                if (sn != NULL) {
                        s->src_node = sn;
                        s->src_node->states++;
+                       VERIFY(s->src_node->states != 0);
                }
                if (nsn != NULL) {
                        PF_ACPY(&nsn->raddr, &pd->naddr, af);
                        s->nat_src_node = nsn;
                        s->nat_src_node->states++;
+                       VERIFY(s->nat_src_node->states != 0);
                }
                if (pd->proto == IPPROTO_TCP) {
                        if ((pd->flags & PFDESC_TCP_NORM) &&
@@ -5195,8 +5269,8 @@ cleanup:
                sk->af = af;
 #ifndef NO_APPLE_EXTENSIONS
                if (pd->proto == IPPROTO_UDP) {
-                       if (pd->hdr.udp->uh_sport == PF_IKE_PORT &&
-                           pd->hdr.udp->uh_dport == PF_IKE_PORT) {
+                       if (ntohs(pd->hdr.udp->uh_sport) == PF_IKE_PORT &&
+                           ntohs(pd->hdr.udp->uh_dport) == PF_IKE_PORT) {
                                sk->proto_variant = PF_EXTFILTER_APD;
                        } else {
                                sk->proto_variant = nr ? nr->extfilter :
@@ -5323,7 +5397,8 @@ cleanup:
                                u_int16_t dport = (direction == PF_OUT) ?
                                    sk->ext.xport.port : sk->gwy.xport.port;
 
-                               if (nr != NULL && dport == PF_PPTP_PORT) {
+                               if (nr != NULL &&
+                                   ntohs(dport) == PF_PPTP_PORT) {
                                        struct pf_app_state *as;
 
                                        as = pool_get(&pf_app_state_pl,
@@ -5349,8 +5424,9 @@ cleanup:
                        case IPPROTO_UDP: {
                                struct udphdr *uh = pd->hdr.udp;
 
-                               if (nr != NULL && uh->uh_sport == PF_IKE_PORT &&
-                                   uh->uh_dport == PF_IKE_PORT) {
+                               if (nr != NULL &&
+                                   ntohs(uh->uh_sport) == PF_IKE_PORT &&
+                                   ntohs(uh->uh_dport) == PF_IKE_PORT) {
                                        struct pf_app_state *as;
 
                                        as = pool_get(&pf_app_state_pl,
@@ -5614,9 +5690,9 @@ pf_pptp_handler(struct pf_state *s, int direction, int off,
        as = &s->state_key->app_state->u.pptp;
        m_copydata(m, off, plen, &cm);
 
-       if (cm.hdr.magic != PF_PPTP_MAGIC_NUMBER)
+       if (ntohl(cm.hdr.magic) != PF_PPTP_MAGIC_NUMBER)
                return;
-       if (cm.hdr.type != htons(1))
+       if (ntohs(cm.hdr.type) != 1)
                return;
 
        sk = s->state_key;
@@ -5659,6 +5735,7 @@ pf_pptp_handler(struct pf_state *s, int direction, int off,
                gsk->gwy.xport.call_id = 0;
                gsk->ext.xport.call_id = 0;
 
+               STATE_INC_COUNTERS(gs);
                as->grev1_state = gs;
        } else {
                gsk = gs->state_key;
@@ -5816,8 +5893,12 @@ pf_pptp_handler(struct pf_state *s, int direction, int off,
                }
 
                m = pf_lazy_makewritable(pd, m, off + plen);
-               if (!m)
+               if (!m) {
+                       as->grev1_state = NULL;
+                       STATE_DEC_COUNTERS(gs);
+                       pool_put(&pf_state_pl, gs);
                        return;
+               }
                m_copyback(m, off, plen, &cm);
        }
 
@@ -5835,8 +5916,14 @@ pf_pptp_handler(struct pf_state *s, int direction, int off,
                gs->creation = pf_time_second();
                gs->expire = pf_time_second();
                gs->timeout = PFTM_GREv1_FIRST_PACKET;
-               if (gs->src_node) ++gs->src_node->states;
-               if (gs->nat_src_node) ++gs->nat_src_node->states;
+               if (gs->src_node != NULL) {
+                       ++gs->src_node->states;
+                       VERIFY(gs->src_node->states != 0);
+               }
+               if (gs->nat_src_node != NULL) {
+                       ++gs->nat_src_node->states;
+                       VERIFY(gs->nat_src_node->states != 0);
+               }
                pf_set_rt_ifp(gs, &sk->lan.addr);
                if (pf_insert_state(BOUND_IFACE(s->rule.ptr, kif), gs)) {
 
@@ -5851,7 +5938,8 @@ pf_pptp_handler(struct pf_state *s, int direction, int off,
                         * succeed.  Failures are expected to be rare enough
                         * that fixing this is a low priority.
                         */
-
+                       as->grev1_state = NULL;
+                       pd->lmw = -1;
                        pf_src_tree_remove_state(gs);
                        STATE_DEC_COUNTERS(gs);
                        pool_put(&pf_state_pl, gs);
@@ -6105,9 +6193,27 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif,
                                            >> sws;
                                        dws = dst->wscale & PF_WSCALE_MASK;
                                } else {
+#ifndef NO_APPLE_MODIFICATION
+                                       /*
+                                        * <rdar://5786370>
+                                        *
+                                        * Window scale negotiation has failed,
+                                        * therefore we must restore the window
+                                        * scale in the state record that we
+                                        * optimistically removed in
+                                        * pf_test_rule().  Care is required to
+                                        * prevent arithmetic overflow from
+                                        * zeroing the window when it's
+                                        * truncated down to 16-bits.   --jhw
+                                        */
+                                       u_int32_t _win = dst->max_win;
+                                       _win <<= dst->wscale & PF_WSCALE_MASK;
+                                       dst->max_win = MIN(0xffff, _win);
+#else
                                        /* fixup other window */
                                        dst->max_win <<= dst->wscale &
                                            PF_WSCALE_MASK;
+#endif
                                        /* in case of a retrans SYN|ACK */
                                        dst->wscale = 0;
                                }
@@ -6125,9 +6231,16 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif,
                 * the crappy stack check or if we picked up the connection
                 * after establishment)
                 */
+#ifndef NO_APPLE_MODIFICATIONS
+               if (src->seqhi == 1 ||
+                   SEQ_GEQ(end + MAX(1, (u_int32_t)dst->max_win << dws),
+                   src->seqhi))
+                       src->seqhi = end + MAX(1, (u_int32_t)dst->max_win << dws);
+#else
                if (src->seqhi == 1 ||
                    SEQ_GEQ(end + MAX(1, dst->max_win << dws), src->seqhi))
                        src->seqhi = end + MAX(1, dst->max_win << dws);
+#endif
                if (win > src->max_win)
                        src->max_win = win;
 
@@ -6201,7 +6314,11 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif,
 #define MAXACKWINDOW (0xffff + 1500)   /* 1500 is an arbitrary fudge factor */
        if (SEQ_GEQ(src->seqhi, end) &&
            /* Last octet inside other's window space */
+#ifndef NO_APPLE_MODIFICATIONS
+           SEQ_GEQ(seq, src->seqlo - ((u_int32_t)dst->max_win << dws)) &&
+#else
            SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) &&
+#endif
            /* Retrans: not more than one window back */
            (ackskew >= -MAXACKWINDOW) &&
            /* Acking not more than one reassembled fragment backwards */
@@ -6229,9 +6346,13 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif,
                if (SEQ_GT(end, src->seqlo))
                        src->seqlo = end;
                /* slide the window of what the other end can send */
+#ifndef NO_APPLE_MODIFICATIONS
+               if (SEQ_GEQ(ack + ((u_int32_t)win << sws), dst->seqhi))
+                       dst->seqhi = ack + MAX(((u_int32_t)win << sws), 1);
+#else
                if (SEQ_GEQ(ack + (win << sws), dst->seqhi))
                        dst->seqhi = ack + MAX((win << sws), 1);
-
+#endif
 
                /* update states */
                if (th->th_flags & TH_SYN)
@@ -6331,8 +6452,13 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif,
                if (SEQ_GT(end, src->seqlo))
                        src->seqlo = end;
                /* slide the window of what the other end can send */
+#ifndef NO_APPLE_MODIFICATIONS
+               if (SEQ_GEQ(ack + ((u_int32_t)win << sws), dst->seqhi))
+                       dst->seqhi = ack + MAX(((u_int32_t)win << sws), 1);
+#else
                if (SEQ_GEQ(ack + (win << sws), dst->seqhi))
                        dst->seqhi = ack + MAX((win << sws), 1);
+#endif
 
                /*
                 * Cannot set dst->seqhi here since this could be a shotgunned
@@ -6374,7 +6500,12 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif,
                            "fwd" : "rev");
                        printf("pf: State failure on: %c %c %c %c | %c %c\n",
                            SEQ_GEQ(src->seqhi, end) ? ' ' : '1',
+#ifndef NO_APPLE_MODIFICATIONS
+                           SEQ_GEQ(seq,
+                           src->seqlo - ((u_int32_t)dst->max_win << dws)) ?
+#else
                            SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) ?
+#endif
                            ' ': '2',
                            (ackskew >= -MAXACKWINDOW) ? ' ' : '3',
                            (ackskew <= (MAXACKWINDOW << sws)) ? ' ' : '4',
@@ -6447,7 +6578,7 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif,
 
 static int
 pf_test_state_udp(struct pf_state **state, int direction, struct pfi_kif *kif,
-    struct mbuf *m, int off, void *h, struct pf_pdesc *pd)
+    struct mbuf *m, int off, void *h, struct pf_pdesc *pd, u_short *reason)
 {
 #pragma unused(h)
        struct pf_state_peer    *src, *dst;
@@ -6487,7 +6618,8 @@ pf_test_state_udp(struct pf_state **state, int direction, struct pfi_kif *kif,
        }
 
 #ifndef NO_APPLE_EXTENSIONS
-       if (uh->uh_sport == PF_IKE_PORT && uh->uh_dport == PF_IKE_PORT) {
+       if (ntohs(uh->uh_sport) == PF_IKE_PORT &&
+           ntohs(uh->uh_dport) == PF_IKE_PORT) {
                struct pf_ike_hdr ike;
                size_t plen = m->m_pkthdr.len - off - sizeof (*uh);
                if (plen < PF_IKE_PACKET_MINSIZE) {
@@ -6570,6 +6702,10 @@ pf_test_state_udp(struct pf_state **state, int direction, struct pfi_kif *kif,
            (*state)->state_key->app_state->handler) {
                (*state)->state_key->app_state->handler(*state, direction,
                    off + uh->uh_ulen, pd, kif);
+               if (pd->lmw < 0) {
+                       REASON_SET(reason, PFRES_MEMORY);
+                       return (PF_DROP);
+               }
                m = pd->mp;
        }
 #endif
@@ -6968,7 +7104,12 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif,
                        }
 
                        if (!SEQ_GEQ(src->seqhi, seq) ||
+#ifndef NO_APPLE_MODIFICATION
+                           !SEQ_GEQ(seq,
+                           src->seqlo - ((u_int32_t)dst->max_win << dws))) {
+#else
                            !SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws))) {
+#endif
                                if (pf_status.debug >= PF_DEBUG_MISC) {
                                        printf("pf: BAD ICMP %d:%d ",
                                            icmptype, pd->hdr.icmp->icmp_code);
@@ -7081,8 +7222,8 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif,
 #ifndef NO_APPLE_EXTENSIONS
                        key.proto_variant = PF_EXTFILTER_APD;
 
-                       if (uh.uh_sport == PF_IKE_PORT &&
-                           uh.uh_dport == PF_IKE_PORT) {
+                       if (ntohs(uh.uh_sport) == PF_IKE_PORT &&
+                           ntohs(uh.uh_dport) == PF_IKE_PORT) {
                                struct pf_ike_hdr ike;
                                size_t plen =
                                    m->m_pkthdr.len - off2 - sizeof (uh);
@@ -8330,8 +8471,6 @@ pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p,
                        h = mtod(m, struct ip *);               \
                }                                               \
        } while (0)
-#else
-#define PF_APPLE_UPDATE_PDESC_IPv4()
 #endif
 
 int
@@ -8439,9 +8578,13 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0,
                if ((th.th_flags & TH_ACK) && pd.p_len == 0)
                        pqid = 1;
                action = pf_normalize_tcp(dir, kif, m, 0, off, h, &pd);
-               if (action == PF_DROP)
+#ifndef NO_APPLE_EXTENSIONS
+               if (pd.lmw < 0)
                        goto done;
                PF_APPLE_UPDATE_PDESC_IPv4();
+#endif
+               if (action == PF_DROP)
+                       goto done;
                action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd,
                    &reason);
 #ifndef NO_APPLE_EXTENSIONS
@@ -8478,7 +8621,8 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0,
                        REASON_SET(&reason, PFRES_SHORT);
                        goto done;
                }
-               action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd);
+               action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd,
+                   &reason);
 #ifndef NO_APPLE_EXTENSIONS
                if (pd.lmw < 0)
                        goto done;
@@ -8614,7 +8758,10 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0,
        }
 
 done:
+#ifndef NO_APPLE_EXTENSIONS
+       *m0 = pd.mp;
        PF_APPLE_UPDATE_PDESC_IPv4();
+#endif
 
        if (action == PF_PASS && h->ip_hl > 5 &&
            !((s && s->allow_opts) || r->allow_opts)) {
@@ -8732,8 +8879,15 @@ done:
        }
 
 #ifndef NO_APPLE_EXTENSIONS
+       VERIFY(m == NULL || pd.mp == NULL || pd.mp == m);
+
        if (*m0) {
                if (pd.lmw < 0) {
+                       REASON_SET(&reason, PFRES_MEMORY);
+                       action = PF_DROP;
+               }
+
+               if (action == PF_DROP) {
                        m_freem(*m0);
                        *m0 = NULL;
                        return (PF_DROP);
@@ -8766,8 +8920,6 @@ done:
                        h = mtod(m, struct ip6_hdr *);          \
                }                                               \
        } while (0)
-#else
-#define PF_APPLE_UPDATE_PDESC_IPv6()
 #endif
 
 int
@@ -8944,9 +9096,13 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0,
                }
                pd.p_len = pd.tot_len - off - (th.th_off << 2);
                action = pf_normalize_tcp(dir, kif, m, 0, off, h, &pd);
-               if (action == PF_DROP)
+#ifndef NO_APPLE_EXTENSIONS
+               if (pd.lmw < 0)
                        goto done;
                PF_APPLE_UPDATE_PDESC_IPv6();
+#endif
+               if (action == PF_DROP)
+                       goto done;
                action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd,
                    &reason);
 #ifndef NO_APPLE_EXTENSIONS
@@ -8983,7 +9139,8 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0,
                        REASON_SET(&reason, PFRES_SHORT);
                        goto done;
                }
-               action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd);
+               action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd,
+                   &reason);
 #ifndef NO_APPLE_EXTENSIONS
                if (pd.lmw < 0)
                        goto done;
@@ -9120,7 +9277,10 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0,
        }
 
 done:
+#ifndef NO_APPLE_EXTENSIONS
+       *m0 = pd.mp;
        PF_APPLE_UPDATE_PDESC_IPv6();
+#endif
 
        if (n != m) {
                m_freem(n);
@@ -9246,8 +9406,15 @@ done:
                pf_route6(m0, r, dir, kif->pfik_ifp, s, &pd);
 #else
 #ifndef NO_APPLE_EXTENSIONS
+       VERIFY(m == NULL || pd.mp == NULL || pd.mp == m);
+
        if (*m0) {
                if (pd.lmw < 0) {
+                       REASON_SET(&reason, PFRES_MEMORY);
+                       action = PF_DROP;
+               }
+
+               if (action == PF_DROP) {
                        m_freem(*m0);
                        *m0 = NULL;
                        return (PF_DROP);
@@ -9411,6 +9578,15 @@ pf_time_second(void)
 {
        struct timeval t;
 
+       microuptime(&t);
+       return (t.tv_sec);
+}
+
+uint64_t
+pf_calendar_time_second(void)
+{
+       struct timeval t;
+
        microtime(&t);
        return (t.tv_sec);
 }
index 8145fed94130e505e5ea0da31077824c506e1005..5b8461e375d0dd38cb5e601f79cecdf658ecea86 100644 (file)
@@ -1329,7 +1329,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
                        error = ENOMEM;
                } else {
                        pf_status.running = 1;
-                       pf_status.since = pf_time_second();
+                       pf_status.since = pf_calendar_time_second();
                        if (pf_status.stateid == 0) {
                                pf_status.stateid = pf_time_second();
                                pf_status.stateid = pf_status.stateid << 32;
@@ -1348,7 +1348,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
                        mbuf_growth_normal();
                        pf_detach_hooks();
                        pf_status.running = 0;
-                       pf_status.since = pf_time_second();
+                       pf_status.since = pf_calendar_time_second();
                        wakeup(pf_purge_thread_fn);
                        DPFPRINTF(PF_DEBUG_MISC, ("pf: stopped\n"));
                }
@@ -1922,6 +1922,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
                        break;
                }
                pf_default_rule.states++;
+               VERIFY(pf_default_rule.states != 0);
                break;
        }
 
@@ -2007,7 +2008,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
                bzero(pf_status.counters, sizeof (pf_status.counters));
                bzero(pf_status.fcounters, sizeof (pf_status.fcounters));
                bzero(pf_status.scounters, sizeof (pf_status.scounters));
-               pf_status.since = pf_time_second();
+               pf_status.since = pf_calendar_time_second();
                if (*pf_status.ifname)
                        pfi_update_status(pf_status.ifname, NULL);
                break;
index 60deece57d49b2a4aa5d7fd31268c9a896239639..b8bdb0034359296c60c644c524c019eaa52a38f4 100644 (file)
@@ -118,6 +118,7 @@ __private_extern__ void pool_sethardlimit(struct pool *, int,
 __private_extern__ void *pool_get(struct pool *, int);
 __private_extern__ void pool_put(struct pool *, void *);
 __private_extern__ u_int64_t pf_time_second(void);
+__private_extern__ u_int64_t pf_calendar_time_second(void);
 #endif /* KERNEL */
 
 union sockaddr_union {
index 937341b00a139fbc0d60355a646248c78b3196df..df3b53ba3008191928749e41b87bc339716ce902 100644 (file)
@@ -1208,6 +1208,8 @@ rtioctl(unsigned long req, caddr_t data, struct proc *p)
 #if INET && MROUTING
        return mrt_ioctl(req, data);
 #else
+#pragma unused(req)
+#pragma unused(data)
        return ENXIO;
 #endif
 }
index 6897e77ac87492b9b26765821083930e7db20a02..c553930f1b13fde46a8c263eac47dd2b8837f6f4 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2004-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2004-2009 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -74,6 +74,7 @@
 #include <net/if_arp.h>
 #include <net/if_dl.h>
 #include <net/dlil.h>
+#include <net/if_types.h>
 #include <net/route.h>
 #include <netinet/if_ether.h>
 #include <netinet/in_var.h>
@@ -937,34 +938,64 @@ arp_ip_handle_input(
        struct llinfo_arp *llinfo;
        errno_t error;
        int created_announcement = 0;
-
+       int bridged = 0, is_bridge = 0;
+       
        /* Do not respond to requests for 0.0.0.0 */
        if (target_ip->sin_addr.s_addr == 0 && arpop == ARPOP_REQUEST)
                goto done;
+       
+       if (ifp->if_bridge)
+               bridged = 1;
+       if (ifp->if_type == IFT_BRIDGE)
+               is_bridge = 1;
 
        /*
         * Determine if this ARP is for us
+        * For a bridge, we want to check the address irrespective 
+        * of the receive interface.
         */
        lck_rw_lock_shared(in_ifaddr_rwlock);
        TAILQ_FOREACH(ia, INADDR_HASH(target_ip->sin_addr.s_addr), ia_hash) {
-               /* do_bridge should be tested here for bridging */
-               if (ia->ia_ifp == ifp &&
+               if (((bridged && ia->ia_ifp->if_bridge != NULL) ||
+                       (ia->ia_ifp == ifp)) &&
                    ia->ia_addr.sin_addr.s_addr == target_ip->sin_addr.s_addr) {
-                       best_ia = ia;
-                       ifaref(&best_ia->ia_ifa);
-                       lck_rw_done(in_ifaddr_rwlock);
-                       goto match;
+                               best_ia = ia;
+                               ifaref(&best_ia->ia_ifa);
+                               lck_rw_done(in_ifaddr_rwlock);
+                               goto match;
                }
        }
 
        TAILQ_FOREACH(ia, INADDR_HASH(sender_ip->sin_addr.s_addr), ia_hash) {
-               /* do_bridge should be tested here for bridging */
-               if (ia->ia_ifp == ifp &&
+               if (((bridged && ia->ia_ifp->if_bridge != NULL) ||
+                       (ia->ia_ifp == ifp)) &&
                    ia->ia_addr.sin_addr.s_addr == sender_ip->sin_addr.s_addr) {
-                       best_ia = ia;
-                       ifaref(&best_ia->ia_ifa);
-                       lck_rw_done(in_ifaddr_rwlock);
-                       goto match;
+                               best_ia = ia;
+                               ifaref(&best_ia->ia_ifa);
+                               lck_rw_done(in_ifaddr_rwlock);
+                               goto match;
+               }
+       }
+
+#define BDG_MEMBER_MATCHES_ARP(addr, ifp, ia)                                                          \
+       (ia->ia_ifp->if_bridge == ifp->if_softc &&                                                              \
+       !bcmp(ifnet_lladdr(ia->ia_ifp), ifnet_lladdr(ifp), ifp->if_addrlen) &&  \
+       addr == ia->ia_addr.sin_addr.s_addr)
+       /*
+        * Check the case when bridge shares its MAC address with
+        * some of its children, so packets are claimed by bridge
+        * itself (bridge_input() does it first), but they are really
+        * meant to be destined to the bridge member.
+        */
+       if (is_bridge) {
+               TAILQ_FOREACH(ia, INADDR_HASH(target_ip->sin_addr.s_addr), ia_hash) {
+                       if (BDG_MEMBER_MATCHES_ARP(target_ip->sin_addr.s_addr, ifp, ia)) {
+                               ifp = ia->ia_ifp;
+                               best_ia = ia;
+                               ifaref(&best_ia->ia_ifa);
+                               lck_rw_done(in_ifaddr_rwlock);
+                               goto match;
+                       }
                }
        }
        lck_rw_done(in_ifaddr_rwlock);
@@ -980,12 +1011,16 @@ arp_ip_handle_input(
                        continue;
                best_ia = (struct in_ifaddr *)ifa;
                ifaref(&best_ia->ia_ifa);
-               break;
+               ifnet_lock_done(ifp);
+               goto match;
        }
        ifnet_lock_done(ifp);
 
-       /* If we don't have an IP address on this interface, ignore the packet */
-       if (best_ia == NULL)
+       /*
+        * If we're not a bridge member, or if we are but there's no
+        * IPv4 address to use for the interface, drop the packet.
+        */
+       if (!bridged || best_ia == NULL)
                goto done;
 
 match:
@@ -995,7 +1030,7 @@ match:
        }
 
        /* Check for a conflict */
-       if (sender_ip->sin_addr.s_addr == best_ia->ia_addr.sin_addr.s_addr) {
+       if (!bridged && sender_ip->sin_addr.s_addr == best_ia->ia_addr.sin_addr.s_addr) {
                struct kev_msg        ev_msg;
                struct kev_in_collision *in_collision;
                u_char  storage[sizeof(struct kev_in_collision) + MAX_HW_LEN];
@@ -1152,7 +1187,7 @@ match:
 
        RT_LOCK_ASSERT_HELD(route);
        gateway = SDL(route->rt_gateway);
-       if (route->rt_ifp != ifp) {
+       if (!bridged && route->rt_ifp != ifp) {
                if (!IN_LINKLOCAL(ntohl(sender_ip->sin_addr.s_addr)) || (ifp->if_eflags & IFEF_ARPLL) == 0) {
                        if (log_arp_warnings)
                                log(LOG_ERR, "arp: %s is on %s%d but got reply from %s on %s%d\n",
@@ -1286,6 +1321,19 @@ respond:
 
                if (error == 0) {
                        RT_LOCK_ASSERT_HELD(route);
+                       /*
+                        * Return proxied ARP replies only on the interface
+                        * or bridge cluster where this network resides.
+                        * Otherwise we may conflict with the host we are
+                        * proxying for.
+                        */
+                       if (route->rt_ifp != ifp &&
+                               (route->rt_ifp->if_bridge != ifp->if_bridge ||
+                                ifp->if_bridge == NULL)) {
+                                       RT_REMREF_LOCKED(route);
+                                       RT_UNLOCK(route);
+                                       goto done;
+                               }
                        proxied = *SDL(route->rt_gateway);
                        target_hw = &proxied;
                } else {
index 090c692bc6830f886c6898fed644d1a12674480c..54fceaef493e98cac7655b5197047abd5cd225b3 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
 #include <netinet/ip_dummynet.h>
 #include <netinet/ip_var.h>
 
-#if BRIDGE
-#include <netinet/if_ether.h> /* for struct arpcom */
-#include <net/bridge.h>
-#endif
-
 /*
  * We keep a private variable for the simulation time, but we could
  * probably use an existing one ("softticks" in sys/kern/kern_timer.c)
@@ -1155,28 +1150,6 @@ dummynet_send(struct mbuf *m)
                        proto_inject(PF_INET, m);
                        break ;
        
-#if BRIDGE
-               case DN_TO_BDG_FWD :
-                       /*
-                        * The bridge requires/assumes the Ethernet header is
-                        * contiguous in the first mbuf header.  Insure this is true.
-                        */
-                       if (BDG_LOADED) {
-                       if (m->m_len < ETHER_HDR_LEN &&
-                               (m = m_pullup(m, ETHER_HDR_LEN)) == NULL) {
-                               printf("dummynet/bridge: pullup fail, dropping pkt\n");
-                               break;
-                       }
-                       m = bdg_forward_ptr(m, pkt->ifp);
-                       } else {
-                       /* somebody unloaded the bridge module. Drop pkt */
-                       /* XXX rate limit */
-                       printf("dummynet: dropping bridged packet trapped in pipe\n");
-                       }
-                       if (m)
-                       m_freem(m);
-                       break;
-#endif         
                default:
                        printf("dummynet: bad switch %d!\n", pkt->dn_dir);
                        m_freem(m);
index eaf005f60cdd87b7532d5731f648a7920e72babd..a989e64e33da6224275720ec668870ec9deeac80 100644 (file)
@@ -2633,7 +2633,6 @@ ip_setmoptions(sopt, imop)
        struct ip_moptions **imop;
 {
        int error = 0;
-       int i;
        struct in_addr addr;
        struct ip_mreq mreq;
        struct ifnet *ifp = NULL;
@@ -2654,20 +2653,23 @@ ip_setmoptions(sopt, imop)
        switch (sopt->sopt_name) {
        /* store an index number for the vif you wanna use in the send */
 #if MROUTING
-       case IP_MULTICAST_VIF:
-               if (legal_vif_num == 0) {
-                       error = EOPNOTSUPP;
-                       break;
-               }
-               error = sooptcopyin(sopt, &i, sizeof i, sizeof i);
-               if (error)
-                       break;
-               if (!legal_vif_num(i) && (i != -1)) {
-                       error = EINVAL;
+       case IP_MULTICAST_VIF: 
+               {
+                       int i;
+                       if (legal_vif_num == 0) {
+                               error = EOPNOTSUPP;
+                               break;
+                       }
+                       error = sooptcopyin(sopt, &i, sizeof i, sizeof i);
+                       if (error)
+                               break;
+                       if (!legal_vif_num(i) && (i != -1)) {
+                               error = EINVAL;
+                               break;
+                       }
+                       imo->imo_multicast_vif = i;
                        break;
                }
-               imo->imo_multicast_vif = i;
-               break;
 #endif /* MROUTING */
 
        case IP_MULTICAST_IF:
index e7fda107f4a0ff73bc50891ea0ae101146184f46..a293cc24a90151bc4deba2398aa00e789bd1f5a7 100644 (file)
@@ -1007,6 +1007,10 @@ findpcb:
                goto drop;
 #endif
 
+       /* Radar 7377561: Avoid processing packets while closing a listen socket */
+       if (tp->t_state == TCPS_LISTEN && (so->so_options & SO_ACCEPTCONN) == 0) 
+               goto drop;
+
        if (so->so_options & (SO_DEBUG|SO_ACCEPTCONN)) {
 #if TCPDEBUG
                if (so->so_options & SO_DEBUG) {
@@ -1296,7 +1300,6 @@ findpcb:
                        KERNEL_DEBUG(DBG_FNC_TCP_NEWCONN | DBG_FUNC_END,0,0,0,0,0);
                }
        }
-
 #if 1
        lck_mtx_assert(((struct inpcb *)so->so_pcb)->inpcb_mtx, LCK_MTX_ASSERT_OWNED);
 #endif
index 5fc80e33054070ba0695b7e939c8899197b7551e..a9fd82b9852afff77d2bbe4c9676889e82d186d3 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2003-2009 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
@@ -528,14 +528,14 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
        getmicrotime(&timenow);
 
        privileged = (proc_suser(p) == 0);
-
+#if MROUTING
        switch (cmd) {
        case SIOCGETSGCNT_IN6:
        case SIOCGETMIFCNT_IN6_32:
        case SIOCGETMIFCNT_IN6_64:
                return (mrt6_ioctl(cmd, data));
        }
-
+#endif
        if (ifp == NULL)
                return (EOPNOTSUPP);
 
@@ -724,20 +724,9 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
 
        case SIOCPROTOATTACH_IN6_32:
        case SIOCPROTOATTACH_IN6_64:
-               switch (ifp->if_type) {
-#if IFT_BRIDGE /*OpenBSD 2.8*/
-       /* some of the interfaces are inherently not IPv6 capable */
-                       case IFT_BRIDGE:
-                               return;
-                               /* NOTREACHED */
-#endif
-                       default:
-                               if ((error = proto_plumb(PF_INET6, ifp)))
-                                       printf("SIOCPROTOATTACH_IN6: %s "
-                                           "error=%d\n", if_name(ifp), error);
-                               break;
-
-               }
+               if ((error = proto_plumb(PF_INET6, ifp)))
+                       printf("SIOCPROTOATTACH_IN6: %s "
+                                  "error=%d\n", if_name(ifp), error);
                return (error);
                /* NOTREACHED */
 
index dff06569fe54549ac1cc0c036a212fda850d345f..5995b212d0952bab0b0f5b32c956b6299bb77e17 100644 (file)
@@ -307,6 +307,7 @@ found:
 #if IFT_IEEE80211
        case IFT_IEEE80211:
 #endif
+       case IFT_BRIDGE:
                /* IEEE802/EUI64 cases - what others? */
                /* IEEE1394 uses 16byte length address starting with EUI64 */
                if (addrlen > 8)
index a197c6c6b6fc7be8789c31215a34a26f81c73f0f..d2621dd30700f2afc602d0fbd95174b460d8a78e 100644 (file)
@@ -312,6 +312,7 @@ struct ip6protosw inet6sw[] = {
   0,           rip_unlock,     0,
   { 0, 0 }, NULL, { 0 }
 },
+#if MROUTING
 { SOCK_RAW,     &inet6domain,  IPPROTO_PIM,    PR_ATOMIC|PR_ADDR|PR_LASTHDR,
   pim6_input,  rip6_pr_output, 0,              rip6_ctloutput,
   0,
@@ -321,6 +322,17 @@ struct ip6protosw inet6sw[] = {
   0,           rip_unlock,     0,
   { 0, 0 }, NULL, { 0 }
 },
+#else
+{ SOCK_RAW,     &inet6domain,  IPPROTO_PIM,    PR_ATOMIC|PR_ADDR|PR_LASTHDR,
+  0,           0,              0,              rip6_ctloutput,
+  0,
+  0,           0,              0,              0,
+  0,   
+  &rip6_usrreqs,
+  0,           rip_unlock,     0,
+  { 0, 0 }, NULL, { 0 }
+},
+#endif
 /* raw wildcard */
 { SOCK_RAW,    &inet6domain,   0,              PR_ATOMIC|PR_ADDR|PR_LASTHDR,
   rip6_input,  rip6_pr_output, 0,              rip6_ctloutput,
@@ -548,8 +560,10 @@ SYSCTL_INT(_net_inet6_ip6, IPV6CTL_AUTO_LINKLOCAL,
        auto_linklocal, CTLFLAG_RW, &ip6_auto_linklocal,        0, "");
 SYSCTL_STRUCT(_net_inet6_ip6, IPV6CTL_RIP6STATS, rip6stats, CTLFLAG_RD,
        &rip6stat, rip6stat, "");
+#if MROUTING
 SYSCTL_STRUCT(_net_inet6_ip6, OID_AUTO, mrt6stat, CTLFLAG_RD,
         &mrt6stat, mrt6stat, "");
+#endif
 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_NEIGHBORGCTHRESH,
        neighborgcthresh, CTLFLAG_RW,   &ip6_neighborgcthresh,  0, "");
 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MAXIFPREFIXES,
index 1c19434ab1fa88cc9a65e92e039a75ca37c87550..cdf3776b2b18ceca165d4f4950af1b774c2a1501 100644 (file)
@@ -637,7 +637,11 @@ ip6_input(m)
                ifnet_lock_done(ifp);
                if (in6m)
                        ours = 1;
+#if MROUTING
                else if (!ip6_mrouter) {
+#else
+               else {
+#endif
                        ip6stat.ip6s_notmember++;
                        ip6stat.ip6s_cantforward++;
                        in6_ifstat_inc(ifp, ifs6_in_discard);
@@ -902,12 +906,14 @@ ip6_input(m)
                 * ip6_mforward() returns a non-zero value, the packet
                 * must be discarded, else it may be accepted below.
                 */
+#if MROUTING
                if (ip6_mrouter && ip6_mforward(ip6, m->m_pkthdr.rcvif, m)) {
                        ip6stat.ip6s_cantforward++;
                        m_freem(m);
                        lck_mtx_unlock(ip6_mutex);
                        return;
                }
+#endif
                if (!ours) {
                        m_freem(m);
                        lck_mtx_unlock(ip6_mutex);
index 3f0735c6337aa1f77094c06f946b37a7ac1fa768..da8c4fc96caa50bf7611c8ab27fd1d4e7d47ab0c 100644 (file)
@@ -135,6 +135,9 @@ extern lck_mtx_t *ip6_mutex;
 struct socket  *ip6_mrouter  = NULL;
 int            ip6_mrouter_ver = 0;
 int            ip6_mrtproto = IPPROTO_PIM;    /* for netstat only */
+
+#if MROUTING
+
 struct mrt6stat        mrt6stat;
 
 #define NO_RTE_FOUND   0x1
@@ -1905,3 +1908,4 @@ pim6_input(mp, offp)
        rip6_input(&m, offp);
        return(IPPROTO_DONE);
 }
+#endif
index dd50d46bda9870d91067436342472802d68b5b8b..5eef448db9f36b3b93407709bfbb98c952fdea45 100644 (file)
@@ -313,6 +313,7 @@ struct rtdetq {             /* XXX: rtdetq is also defined in ip_mroute.h */
 };
 #endif /* _NETINET_IP_MROUTE_H_ */
 
+#if MROUTING
 #ifdef KERNEL_PRIVATE
 extern struct mrt6stat mrt6stat;
 
@@ -322,5 +323,6 @@ extern int ip6_mrouter_done(void);
 extern int mrt6_ioctl(u_long, caddr_t);
 #endif /* KERNEL_PRIVATE */
 #endif /* PRIVATE */
+#endif
 
 #endif /* !_NETINET6_IP6_MROUTE_H_ */
index e426933c6f8bf16f5043b531250757eef22df864..39c0d4602e8ddbf4d7c82a1c5d2a40e7024c34e4 100644 (file)
@@ -867,12 +867,14 @@ skip_ipsec2:;
                         * above, will be forwarded by the ip6_input() routine,
                         * if necessary.
                         */
+#if MROUTING
                        if (ip6_mrouter && (flags & IPV6_FORWARDING) == 0) {
                                if (ip6_mforward(ip6, ifp, m) != 0) {
                                        m_freem(m);
                                        goto done;
                                }
                        }
+#endif
                }
                /*
                 * Multicasts with a hoplimit of zero may be looped back,
index a895cad31cb19c54e9774c19d64d2a304e798342..c5b5bca1753daac74e4a9af33efbde6ea182e173 100644 (file)
@@ -300,8 +300,9 @@ extern int  ip6_neighborgcthresh;   /* Threshold # of NDP entries for GC */
 extern int     ip6_maxifprefixes;      /* Max acceptable prefixes via RA per IF */
 extern int     ip6_maxifdefrouters;    /* Max acceptable def routers via RA */
 extern int     ip6_maxdynroutes;       /* Max # of routes created via redirect */
-
+#ifdef MROUTING
 extern struct socket *ip6_mrouter;     /* multicast routing daemon */
+#endif
 extern int     ip6_sendredirects;      /* send IP redirects when forwarding? */
 extern int     ip6_maxfragpackets;     /* Maximum packets in reassembly queue */
 extern int      ip6_maxfrags;          /* Maximum fragments in reassembly queue */
index b65d9a5eff13e4d2e90b5839e71edb6917b3605d..6a7da3d2bb6a64999a6b5f7308d0cb7e32848996 100644 (file)
@@ -3384,6 +3384,7 @@ ipsec6_output_tunnel(
                                struct ip *ip;
                                struct sockaddr_in* dst4;
                                struct route *ro4 = NULL;
+                               struct ip_out_args ipoa = { IFSCOPE_NONE };
 
                                /*
                                 * must be last isr because encapsulated IPv6 packet
@@ -3418,14 +3419,7 @@ ipsec6_output_tunnel(
                                        dst4->sin_family = AF_INET;
                                        dst4->sin_len = sizeof(*dst4);
                                        dst4->sin_addr = ip->ip_dst;
-                                       rtalloc(ro4);
                                }
-                               if (ro4->ro_rt == NULL) {
-                                       OSAddAtomic(1, &ipstat.ips_noroute);
-                                       error = EHOSTUNREACH;
-                                       goto bad;
-                               }
-       
                                state->m = ipsec4_splithdr(state->m);
                                if (!state->m) {
                                        error = ENOMEM;
@@ -3474,8 +3468,10 @@ ipsec6_output_tunnel(
                                }
                                ip = mtod(state->m, struct ip *);
                                ip->ip_len = ntohs(ip->ip_len);  /* flip len field before calling ip_output */
-                               ip_output(state->m, NULL, ro4, 0, NULL, NULL);
+                               error = ip_output(state->m, NULL, ro4, IP_OUTARGS, NULL, &ipoa);
                                state->m = NULL;
+                               if (error != 0)
+                                       goto bad;
                                goto done;
                        } else {
                                ipseclog((LOG_ERR, "ipsec6_output_tunnel: "
@@ -4132,6 +4128,7 @@ ipsec_send_natt_keepalive(
        struct udphdr *uh;
        struct ip *ip;
        int error;
+       struct ip_out_args ipoa = { IFSCOPE_NONE };
 
        lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_NOTOWNED);
        
@@ -4172,7 +4169,7 @@ ipsec_send_natt_keepalive(
        uh->uh_sum = 0;
        *(u_int8_t*)((char*)m_mtod(m) + sizeof(struct ip) + sizeof(struct udphdr)) = 0xFF;
        
-       error = ip_output(m, NULL, &sav->sah->sa_route, IP_NOIPSEC, NULL, NULL);
+       error = ip_output(m, NULL, &sav->sah->sa_route, IP_OUTARGS | IP_NOIPSEC, NULL, &ipoa);
        if (error == 0) {
                sav->natt_last_activity = natt_now;
                return TRUE;
index 36e09c8b8264e3b7e6d5e2bbebd9a22e1dc4e0f8..7e9a882e87e1f7341126fdb480b5413b9340e706 100644 (file)
@@ -499,7 +499,11 @@ mld6_sendpkt(
         * Request loopback of the report if we are acting as a multicast
         * router, so that the process-level routing daemon can hear it.
         */
+#if MROUTING
        im6o.im6o_multicast_loop = (ip6_mrouter != NULL);
+#else
+       im6o.im6o_multicast_loop = 0;
+#endif
 
        /* increment output statictics */
        icmp6stat.icp6s_outhist[type]++;
index 5f2c2abcde8df5ea07e0e79195650df8e742c671..a7b5cb3a3dc5284b69f22c87aa69449e08ec9e2f 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2008-2009 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -1309,7 +1309,7 @@ nd6_free(
                dr = defrouter_lookup(&((struct sockaddr_in6 *)rt_key(rt))->
                    sin6_addr, rt->rt_ifp);
 
-               if (ln->ln_router || dr) {
+               if ((ln && ln->ln_router) || dr) {
                        /*
                         * rt6_flush must be called whether or not the neighbor
                         * is in the Default Router List.
@@ -2906,6 +2906,7 @@ nd6_need_cache(
 #if IFT_IEEE80211
        case IFT_IEEE80211:
 #endif
+       case IFT_BRIDGE:
        case IFT_GIF:           /* XXX need more cases? */
                return(1);
        default:
@@ -2933,6 +2934,7 @@ nd6_storelladdr(
 #if IFT_IEEE80211
                case IFT_IEEE80211:
 #endif
+               case IFT_BRIDGE:
                        ETHER_MAP_IPV6_MULTICAST(&SIN6(dst)->sin6_addr,
                                                 desten);
                        return(1);
index 762258e4554d67e4d1ec39f5a64660f39820b12a..d4a32b9274a8d05acd585ec4e7556339de127741 100644 (file)
@@ -566,8 +566,10 @@ rip6_ctloutput(
                case MRT6_ADD_MFC:
                case MRT6_DEL_MFC:
                case MRT6_PIM:
+#if MROUTING
                        error = ip6_mrouter_get(so, sopt);
                        break;
+#endif
                default:
                        error = ip6_ctloutput(so, sopt);
                        break;
@@ -597,8 +599,10 @@ rip6_ctloutput(
                case MRT6_ADD_MFC:
                case MRT6_DEL_MFC:
                case MRT6_PIM:
+#if MROUTING
                        error = ip6_mrouter_set(so, sopt);
                        break;
+#endif
                default:
                        error = ip6_ctloutput(so, sopt);
                        break;
@@ -649,8 +653,10 @@ rip6_detach(struct socket *so)
        if (inp == 0)
                panic("rip6_detach");
        /* xxx: RSVP */
+#if MROUTING
        if (so == ip6_mrouter)
                ip6_mrouter_done();
+#endif
        if (inp->in6p_icmp6filt) {
                FREE(inp->in6p_icmp6filt, M_PCB);
                inp->in6p_icmp6filt = NULL;
index e630eb95c4d092b2bd2aff938341d7c8f69ba79f..ee1a6167988c030c9415dd721746cc413182e3c2 100644 (file)
@@ -3952,10 +3952,10 @@ nfsrv_rcv_locked(socket_t so, struct nfsrv_sock *slp, int waitflag)
        if (slp->ns_sotype == SOCK_STREAM) {
                /*
                 * If there are already records on the queue, defer soreceive()
-                * to an nfsd so that there is feedback to the TCP layer that
+                * to an(other) nfsd so that there is feedback to the TCP layer that
                 * the nfs servers are heavily loaded.
                 */
-               if (slp->ns_rec && waitflag == MBUF_DONTWAIT) {
+               if (slp->ns_rec) {
                        ns_flag = SLP_NEEDQ;
                        goto dorecs;
                }
index a11222c07fcadb1c6975075c29b1e7dfeb71219e..6d3801a50f4e835dfb8a7ecb28510e1fd3114996 100644 (file)
@@ -147,6 +147,7 @@ struct buf {
  * Parameters for buffer cache garbage collection 
  */
 #define BUF_STALE_THRESHHOLD   30      /* Collect if untouched in the last 30 seconds */
+#define BUF_MAX_GC_COUNT       1000    /* Generally 6-8 MB */
 
 /*
  * mask used by buf_flags... these are the readable external flags
index 3abe336ccb2afda2c0a3a471a3027fbc2e54bbef..92f687f3ee13ee10836ac55089c06aad89fb98b0 100644 (file)
@@ -66,6 +66,10 @@ enum {
 typedef struct jetsam_priority_entry {
        pid_t pid;
        uint32_t flags;
+       int32_t hiwat_pages;
+       int32_t hiwat_reserved1;
+       int32_t hiwat_reserved2;
+       int32_t hiwat_reserved3;
 } jetsam_priority_entry_t;
 
 /*
index d5a3d32721056ea669a666277a203f7d12053955..57740c51fd926cd41be7f57d7f1acdf2e08e0d0f 100644 (file)
@@ -292,6 +292,9 @@ struct vfs_attr {
  * NFS export related mount flags.
  */
 #define        MNT_EXPORTED    0x00000100      /* file system is exported */
+#ifdef PRIVATE
+#define MNT_IMGSRC     0x00000200
+#endif /* CONFIG_IMGSRC_ACCESS */
 
 /*
  * MAC labeled / "quarantined" flag
index 6cca245d501206c7975e852fe3db167ac48fc82b..cb71406df546842f4b095aae08030dc3eb887132 100644 (file)
@@ -227,6 +227,10 @@ extern struct mount * dead_mountp;
  *             because the bits here were broken out from the high bits
  *             of the mount flags.
  */
+#ifdef CONFIG_IMGSRC_ACCESS
+#define MNTK_HAS_MOVED         0x00002000
+#define MNTK_BACKS_ROOT                0x00004000
+#endif /* CONFIG_IMGSRC_ACCESS */
 #define MNTK_AUTH_CACHE_TTL    0x00008000      /* rights cache has TTL - TTL of 0 disables cache */
 #define        MNTK_PATH_FROM_ID       0x00010000      /* mounted file system supports id-to-path lookups */
 #define        MNTK_UNMOUNT_PREFLIGHT  0x00020000      /* mounted file system wants preflight check during unmount */
index dc68c04c2cffcd22f85f271fff8ab2e1acc393e9..7d0cfae29b980b97ae3066070070eb1716d8a456 100644 (file)
@@ -42,7 +42,6 @@ struct threadlist {
        TAILQ_ENTRY(threadlist) th_entry;
        thread_t th_thread;
        int      th_flags;
-       uint32_t th_suspended;
        uint16_t th_affinity_tag;
        uint8_t  th_priority;
        uint8_t  th_policy;
@@ -57,6 +56,7 @@ struct threadlist {
 #define TH_LIST_BLOCKED        0x04
 #define TH_LIST_SUSPENDED      0x08
 #define TH_LIST_BUSY           0x10
+#define TH_LIST_NEED_WAKEUP    0x20
 
 struct workitem {
        TAILQ_ENTRY(workitem) wi_entry;
index 85829a914813a63af2e7aee22c826f5275da4ced..72c969c125a6bb86125ddfc739f27ba4415781a8 100644 (file)
@@ -106,6 +106,7 @@ typedef __uint64_t  rlim_t;
 
 #if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)
 #define        PRIO_DARWIN_THREAD      3               /* Second argument is always 0 (current thread) */
+#define        PRIO_DARWIN_PROCESS     4               /* Second argument is a PID */
 
 /*
  * Range limitations for the value of the third parameter to setpriority().
@@ -113,7 +114,8 @@ typedef __uint64_t  rlim_t;
 #define        PRIO_MIN        -20
 #define        PRIO_MAX        20
 
-/* use PRIO_DARWIN_BG to set the current thread into "background" state
+/* 
+ * use PRIO_DARWIN_BG to set the current thread into "background" state
  * which lowers CPU, disk IO, and networking priorites until thread terminates
  * or "background" state is revoked
  */
index 2bd0c593efb1624cf3413f6879e631b6d75f88b1..830dc76ae7f8ac938603026df73c396da6214953 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -120,7 +120,7 @@ struct socket {
        short   so_options;             /* from socket call, see socket.h */
        short   so_linger;              /* time to linger while closing */
        short   so_state;               /* internal state flags SS_*, below */
-       caddr_t so_pcb;                 /* protocol control block */
+       void    *so_pcb;                        /* protocol control block */
        struct  protosw *so_proto;      /* protocol handle */
        /*
         * Variables for connection queueing.
index 4a7700e0bb9ba8c798f2e9ba4591d5d2cf9c1488..ab6ca6658c9c922df6fa22e865fb9b395ecc45f3 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
 #define SIOCGIFBOND    _IOWR('i', 71, struct ifreq)    /* get bond if config */
 #define        SIOCIFCREATE    _IOWR('i', 120, struct ifreq)   /* create clone if */
 #define        SIOCIFDESTROY    _IOW('i', 121, struct ifreq)   /* destroy clone if */
+
+#define SIOCSDRVSPEC    _IOW('i', 123, struct ifdrv)    /* set driver-specific
+                                                                  parameters */
+#define SIOCGDRVSPEC    _IOWR('i', 123, struct ifdrv)   /* get driver-specific
+                                                                  parameters */
+#ifdef KERNEL_PRIVATE
+#define SIOCSDRVSPEC32    _IOW('i', 123, struct ifdrv32)    /* set driver-specific
+                                                                  parameters */
+#define SIOCGDRVSPEC32    _IOWR('i', 123, struct ifdrv32)   /* get driver-specific
+                                                                  parameters */
+#define SIOCSDRVSPEC64    _IOW('i', 123, struct ifdrv64)    /* set driver-specific
+                                                                  parameters */
+#define SIOCGDRVSPEC64    _IOWR('i', 123, struct ifdrv64)   /* get driver-specific
+                                                                  parameters */
+
+#endif /* KERNEL_PRIVATE */
 #define        SIOCSIFVLAN      _IOW('i', 126, struct ifreq)   /* set VLAN config */
 #define        SIOCGIFVLAN     _IOWR('i', 127, struct ifreq)   /* get VLAN config */
 #define        SIOCSETVLAN     SIOCSIFVLAN
index 7c73b04ee119d01035404d76137263c42fb565dd..775a8457b775c1687f451cdf7a4611ff8262881a 100644 (file)
 
 extern struct zone     *ubc_info_zone;
 
+/* 
+ * Maximum number of vfs clusters per vnode
+ */
+#define MAX_CLUSTERS   CONFIG_MAX_CLUSTERS
 
-#define MAX_CLUSTERS 8         /* maximum number of vfs clusters per vnode */
 #define SPARSE_PUSH_LIMIT 4    /* limit on number of concurrent sparse pushes outside of the cl_lockw */
                                 /* once we reach this limit, we'll hold the lock */
 
index cf8f7b455b313cd30eb7a53e0d566e652fa42284..dbff3a50dd187407d363acdbd27617052555f9a7 100644 (file)
@@ -250,6 +250,10 @@ struct vnode {
  */
 extern struct vnode *rootvnode;        /* root (i.e. "/") vnode */
 
+#ifdef CONFIG_IMGSRC_ACCESS
+extern struct vnode *imgsrc_rootvnode;
+#endif /* CONFIG_IMGSRC_ACCESS */
+
 
 /*
  * Mods for exensibility.
index 3f4c4e59372c04ba10b0ed279e438d446f2dcfd2..6d3eba5eb44a45762078f514c6ae6e7af809349a 100644 (file)
@@ -125,7 +125,7 @@ static void buf_reassign(buf_t bp, vnode_t newvp);
 static errno_t buf_acquire_locked(buf_t bp, int flags, int slpflag, int slptimeo);
 static int     buf_iterprepare(vnode_t vp, struct buflists *, int flags);
 static void    buf_itercomplete(vnode_t vp, struct buflists *, int flags);
-static boolean_t buffer_cache_gc(void);
+boolean_t buffer_cache_gc(void);
 
 __private_extern__ int  bdwrite_internal(buf_t, int);
 
@@ -3648,12 +3648,13 @@ dump_buffer:
        return(0);
 }
 
-static boolean_t 
+boolean_t 
 buffer_cache_gc(void)
 {
        buf_t bp;
        boolean_t did_large_zfree = FALSE;
        int now = buf_timestamp();
+       uint32_t count = 0;
 
        lck_mtx_lock_spin(buf_mtxp);
 
@@ -3661,7 +3662,7 @@ buffer_cache_gc(void)
        bp = TAILQ_FIRST(&bufqueues[BQ_META]);
 
        /* Only collect buffers unused in the last N seconds. Note: ordered by timestamp. */
-       while ((bp != NULL) && ((now - bp->b_timestamp) > BUF_STALE_THRESHHOLD)) {
+       while ((bp != NULL) && ((now - bp->b_timestamp) > BUF_STALE_THRESHHOLD) && (count < BUF_MAX_GC_COUNT)) {
                int result, size;
                boolean_t is_zalloc;
 
@@ -3674,6 +3675,7 @@ buffer_cache_gc(void)
                        did_large_zfree = TRUE;
                }
                bp = TAILQ_FIRST(&bufqueues[BQ_META]);
+               count++;
        } 
 
        lck_mtx_unlock(buf_mtxp);
index 5aec1498a48a26d7480d7a8ec7c7176177659475..d436d781baa3ce22544f7b859ac49bfdf5d63b46 100644 (file)
@@ -2718,7 +2718,7 @@ cluster_write_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t old
                         * because IO_HEADZEROFILL and IO_TAILZEROFILL not set
                         */
                        if ((start_offset + total_size) > max_io_size)
-                               total_size -= start_offset;
+                               total_size = max_io_size - start_offset;
                        xfer_resid = total_size;
 
                        retval = cluster_copy_ubc_data_internal(vp, uio, &xfer_resid, 1, 1);
@@ -5614,6 +5614,14 @@ is_file_clean(vnode_t vp, off_t filesize)
 #define DRT_HASH_SMALL_MODULUS 23
 #define DRT_HASH_LARGE_MODULUS 401
 
+/*
+ * Physical memory required before the large hash modulus is permitted.
+ *
+ * On small memory systems, the large hash modulus can lead to phsyical
+ * memory starvation, so we avoid using it there.
+ */
+#define DRT_HASH_LARGE_MEMORY_REQUIRED (1024LL * 1024LL * 1024LL)      /* 1GiB */
+
 #define DRT_SMALL_ALLOCATION   1024    /* 104 bytes spare */
 #define DRT_LARGE_ALLOCATION   16384   /* 344 bytes spare */
 
@@ -5756,8 +5764,12 @@ vfs_drt_alloc_map(struct vfs_drt_clustermap **cmapp)
                 * see whether we should grow to the large one.
                 */
                if (ocmap->scm_modulus == DRT_HASH_SMALL_MODULUS) {
-                       /* if the ring is nearly full */
-                       if (active_buckets > (DRT_HASH_SMALL_MODULUS - 5)) {
+                       /* 
+                        * If the ring is nearly full and we are allowed to
+                        * use the large modulus, upgrade.
+                        */
+                       if ((active_buckets > (DRT_HASH_SMALL_MODULUS - 5)) &&
+                           (max_mem >= DRT_HASH_LARGE_MEMORY_REQUIRED)) {
                                nsize = DRT_HASH_LARGE_MODULUS;
                        } else {
                                nsize = DRT_HASH_SMALL_MODULUS;
index 467eb00b2a0d4efb9bf413ed33745a7bd651c846..529129d9c6b74a63cac02f08198f2ed1447f6a6e 100644 (file)
  */
 struct mount *rootfs;
 struct vnode *rootvnode;
+
+#ifdef CONFIG_IMGSRC_ACCESS
+struct vnode *imgsrc_rootvnode;
+#endif /* IMGSRC_ACESS */
+
 int (*mountroot)(void) = NULL;
 
 /*
index c44732bb830ca2d977b02ee34d73992e958b2550..d78894caf7059d29a2b8e1661fcc68c4754e0b28 100644 (file)
@@ -6901,8 +6901,6 @@ errno_t rmdir_remove_orphaned_appleDouble(vnode_t vp , vfs_context_t ctx, int *
        } while (!eofflag);
        /*
         * If we've made it here all the files in the dir are ._ files.
-        * As we iterate through to delete them, we will verify that
-        * they are true AppleDouble files.
         * We can delete the files even though the node is suspended
         * because we are the owner of the file.
         */
@@ -6943,61 +6941,12 @@ errno_t rmdir_remove_orphaned_appleDouble(vnode_t vp , vfs_context_t ctx, int *
                                            (dp->d_namlen == 2 && dp->d_name[0] == '.' && dp->d_name[1] == '.'))
                                          ) {
 
-                               /*
-                                * This is a ._ file, so verify it is actually an AppleDouble
-                                * file by checking the header before we remove it.
-                                */
-                               vnode_t xvp = NULL;
-                               int did_namei = 0;
-
-                               NDINIT(&nd_temp, DELETE, USEDVP | LOCKPARENT,
-                                      UIO_SYSSPACE, CAST_USER_ADDR_T(dp->d_name), ctx);
+                               NDINIT(&nd_temp, DELETE, USEDVP, UIO_SYSSPACE, CAST_USER_ADDR_T(dp->d_name), ctx);
                                nd_temp.ni_dvp = vp;
-                               error = namei(&nd_temp);
-
-                               if (error) {
-                                       if (error == ENOENT) {
-                                               error = 0;
-                                       } else {
-                                               error = ENOTEMPTY;
-                                       }
-                                       goto out1;
-                               }
-                               did_namei = 1;
-
-                               xvp = nd_temp.ni_vp;
-
-                               error = check_appledouble_header(xvp, ctx);
-                               if (error) {
-                                       error = ENOTEMPTY;
-                                       goto out1;
-                               }
-                               
-                               /* Remove the file. */
-                               error = VNOP_REMOVE(vp, xvp, &nd_temp.ni_cnd, 0, ctx);
-                               if (error) {
-                                       if (error == ENOENT) {
-                                               error = 0;
-                                       }
-                                       goto out1;
-                               }
-
-out1:
-                               /* drop extra reference on vp from LOCKPARENT namei */
-                               vnode_put (vp);
-
-                               if (did_namei) {
-                                       nameidone(&nd_temp);
-                                       did_namei = 0;
-                               }
-                               if (xvp) {
-                                       vnode_put(xvp);
-                                       xvp = NULL;
-                               }
-                               if (error) {
+                               error = unlink1(ctx, &nd_temp, 0);
+                               if (error && error != ENOENT) {
                                        goto outsc;
                                }
-
                        }
                        cpos += dp->d_reclen;
                        dp = (struct dirent*)cpos;
index 23653799fbcec651a7849d6e7822fe97b0b0914e..24dfd95b3c0b799735d5d89954ede199b83dd214 100644 (file)
@@ -153,6 +153,17 @@ static int munge_statfs(struct mount *mp, struct vfsstatfs *sfsp,
 static int statfs64_common(struct mount *mp, struct vfsstatfs *sfsp,
                        user_addr_t bufp);
 static int fsync_common(proc_t p, struct fsync_args *uap, int flags);
+
+#ifdef CONFIG_IMGSRC_ACCESS
+static int prepare_coveredvp(vnode_t vp, vfs_context_t ctx, struct componentname *cnp, const char *fsname);
+static int authorize_devpath_and_update_mntfromname(mount_t mp, user_addr_t devpath, vnode_t *devvpp, vfs_context_t ctx);
+static int place_mount_and_checkdirs(mount_t mp, vnode_t vp, vfs_context_t ctx);
+static void undo_place_on_covered_vp(mount_t mp, vnode_t vp);
+static int mount_begin_update(mount_t mp, vfs_context_t ctx, int flags);
+static void mount_end_update(mount_t mp);
+static int relocate_imageboot_source(vnode_t vp, struct componentname *cnp, const char *fsname, vfs_context_t ctx, boolean_t is64bit, user_addr_t fsmountargs);
+#endif /* CONFIG_IMGSRC_ACCESS */
+
 int (*union_dircheckp)(struct vnode **, struct fileproc *, vfs_context_t);
 
 __private_extern__
@@ -297,6 +308,15 @@ __mac_mount(struct proc *p, register struct __mac_mount_args *uap, __unused int3
        if (error)
                goto out1;
        
+#ifdef CONFIG_IMGSRC_ACCESS
+       if (uap->flags == MNT_IMGSRC) {
+               error = relocate_imageboot_source(vp, &nd.ni_cnd, fstypename, ctx, is_64bit, fsmountargs);
+               vnode_put(pvp);
+               vnode_put(vp);
+               return error;
+       }
+#endif /* CONFIG_IMGSRC_ACCESS */
+
        if (uap->flags & MNT_UPDATE) {
                if ((vp->v_flag & VROOT) == 0) {
                        error = EINVAL;
@@ -323,6 +343,17 @@ __mac_mount(struct proc *p, register struct __mac_mount_args *uap, __unused int3
                        error = ENOTSUP;
                        goto out1;
                }
+
+#ifdef CONFIG_IMGSRC_ACCESS 
+               /* Can't downgrade the backer of the root FS */
+               if ((mp->mnt_kern_flag & MNTK_BACKS_ROOT) &&
+                       (!vfs_isrdonly(mp)) && (uap->flags & MNT_RDONLY))
+               {
+                       error = ENOTSUP;
+                       goto out1;
+               }
+#endif /* CONFIG_IMGSRC_ACCESS */
+
                /*
                 * Only root, or the user that did the original mount is
                 * permitted to update it.
@@ -867,6 +898,368 @@ out1:
        return(error);
 }
 
+#ifdef CONFIG_IMGSRC_ACCESS
+/* 
+ * Flush in-core data, check for competing mount attempts,
+ * and set VMOUNT
+ */
+static int
+prepare_coveredvp(vnode_t vp, vfs_context_t ctx, struct componentname *cnp, const char *fsname)
+{
+       struct vnode_attr va;
+       int error;
+
+       /*
+        * If the user is not root, ensure that they own the directory
+        * onto which we are attempting to mount.
+        */
+       VATTR_INIT(&va);
+       VATTR_WANTED(&va, va_uid);
+       if ((error = vnode_getattr(vp, &va, ctx)) ||
+           (va.va_uid != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
+            (!vfs_context_issuser(ctx)))) { 
+               error = EPERM;
+               goto out;
+       }
+
+       if ( (error = VNOP_FSYNC(vp, MNT_WAIT, ctx)) )
+               goto out;
+
+       if ( (error = buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0)) )
+               goto out;
+
+       if (vp->v_type != VDIR) {
+               error = ENOTDIR;
+               goto out;
+       }
+
+       if (ISSET(vp->v_flag, VMOUNT) && (vp->v_mountedhere != NULL)) {
+               error = EBUSY;
+               goto out;
+       }
+
+#if CONFIG_MACF
+       error = mac_mount_check_mount(ctx, vp,
+           cnp, fsname);
+       if (error != 0)
+               goto out;
+#endif
+
+       vnode_lock_spin(vp);
+       SET(vp->v_flag, VMOUNT);
+       vnode_unlock(vp);
+
+out:
+       return error;
+}
+
+static int
+authorize_devpath_and_update_mntfromname(mount_t mp, user_addr_t devpath, vnode_t *devvpp, vfs_context_t ctx)
+{
+       struct nameidata nd;
+       vnode_t vp;
+       mode_t accessmode;
+       int error;
+
+       NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, devpath, ctx);
+       if ( (error = namei(&nd)) )
+               return error;
+
+       strncpy(mp->mnt_vfsstat.f_mntfromname, nd.ni_cnd.cn_pnbuf, MAXPATHLEN);
+       vp = nd.ni_vp;
+       nameidone(&nd);
+
+       if (vp->v_type != VBLK) {
+               error = ENOTBLK;
+               goto out;
+       }
+       if (major(vp->v_rdev) >= nblkdev) {
+               error = ENXIO;
+               goto out;
+       }
+       /*
+        * If mount by non-root, then verify that user has necessary
+        * permissions on the device.
+        */
+       if (!vfs_context_issuser(ctx)) {
+               accessmode = KAUTH_VNODE_READ_DATA;
+               if ((mp->mnt_flag & MNT_RDONLY) == 0)
+                       accessmode |= KAUTH_VNODE_WRITE_DATA;
+               if ((error = vnode_authorize(vp, NULL, accessmode, ctx)) != 0)
+                       goto out;
+       }
+
+       *devvpp = vp;
+out:
+       if (error) {
+               vnode_put(vp);
+       }
+
+       return error;
+}
+
+/*
+ * Clear VMOUNT, set v_mountedhere, and mnt_vnodecovered, ref the vnode,
+ * and call checkdirs()
+ */
+static int
+place_mount_and_checkdirs(mount_t mp, vnode_t vp, vfs_context_t ctx)
+{
+       int error;
+
+       mp->mnt_vnodecovered = vp; /* XXX This is normally only set at init-time ... */
+
+       vnode_lock_spin(vp);
+       CLR(vp->v_flag, VMOUNT);
+       vp->v_mountedhere = mp;
+       vnode_unlock(vp);
+
+       /*
+        * taking the name_cache_lock exclusively will
+        * insure that everyone is out of the fast path who
+        * might be trying to use a now stale copy of
+        * vp->v_mountedhere->mnt_realrootvp
+        * bumping mount_generation causes the cached values
+        * to be invalidated
+        */
+       name_cache_lock();
+       mount_generation++;
+       name_cache_unlock();
+
+       error = vnode_ref(vp);
+       if (error != 0) {
+               goto out;
+       }
+
+       error = checkdirs(vp, ctx);
+       if (error != 0)  {
+               /* Unmount the filesystem as cdir/rdirs cannot be updated */
+               vnode_rele(vp);
+               goto out;
+       }
+
+out:
+       if (error != 0) {
+               mp->mnt_vnodecovered = NULLVP;
+       }
+       return error;
+}
+
+static void
+undo_place_on_covered_vp(mount_t mp, vnode_t vp)
+{
+       vnode_rele(vp);
+       vnode_lock_spin(vp);
+       vp->v_mountedhere = (mount_t)NULL;
+       vnode_unlock(vp);
+
+       mp->mnt_vnodecovered = NULLVP;
+}
+
+static int
+mount_begin_update(mount_t mp, vfs_context_t ctx, int flags)
+{
+       int error;
+
+       /* unmount in progress return error */
+       mount_lock_spin(mp);
+       if (mp->mnt_lflag & MNT_LUNMOUNT) {
+               mount_unlock(mp);
+               return EBUSY;
+       }
+       mount_unlock(mp);
+       lck_rw_lock_exclusive(&mp->mnt_rwlock);
+
+       /*
+        * We only allow the filesystem to be reloaded if it
+        * is currently mounted read-only.
+        */
+       if ((flags & MNT_RELOAD) &&
+                       ((mp->mnt_flag & MNT_RDONLY) == 0)) {
+               error = ENOTSUP;
+               goto out;
+       }
+
+       /*
+        * Only root, or the user that did the original mount is
+        * permitted to update it.
+        */
+       if (mp->mnt_vfsstat.f_owner != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
+                       (!vfs_context_issuser(ctx))) { 
+               error = EPERM;
+               goto out;
+       }
+#if CONFIG_MACF
+       error = mac_mount_check_remount(ctx, mp);
+       if (error != 0) {
+               goto out;
+       }
+#endif
+
+out:
+       if (error) {
+               lck_rw_done(&mp->mnt_rwlock);
+       }
+
+       return error;
+}
+
+static void 
+mount_end_update(mount_t mp)
+{
+       lck_rw_done(&mp->mnt_rwlock);
+}
+
+static int
+relocate_imageboot_source(vnode_t vp, struct componentname *cnp, 
+               const char *fsname, vfs_context_t ctx, 
+               boolean_t is64bit, user_addr_t fsmountargs)
+{
+       int error;
+       mount_t mp;
+       boolean_t placed = FALSE;
+       vnode_t devvp;
+       struct vfstable *vfsp;
+       user_addr_t devpath;
+       char *old_mntonname;
+
+       /* If we didn't imageboot, nothing to move */
+       if (imgsrc_rootvnode == NULLVP) {
+               return EINVAL;
+       }
+
+       /* Only root can do this */
+       if (!vfs_context_issuser(ctx)) {
+               return EPERM;
+       }
+
+       error = vnode_get(imgsrc_rootvnode);
+       if (error != 0) {
+               return error;
+       }
+
+       MALLOC(old_mntonname, char*, MAXPATHLEN, M_TEMP, M_WAITOK);
+
+       /* Can only move once */
+       mp = vnode_mount(imgsrc_rootvnode);
+       if ((mp->mnt_kern_flag & MNTK_HAS_MOVED) == MNTK_HAS_MOVED) {
+               error = EBUSY;
+               goto out0;
+       }
+
+       /* Get exclusive rwlock on mount, authorize update on mp */
+       error = mount_begin_update(mp , ctx, 0);
+       if (error != 0) {
+               goto out0;
+       }
+
+       /* 
+        * It can only be moved once.  Flag is set under the rwlock,
+        * so we're now safe to proceed.
+        */
+       if ((mp->mnt_kern_flag & MNTK_HAS_MOVED) == MNTK_HAS_MOVED) {
+               goto out1;
+       }
+
+       /* Mark covered vnode as mount in progress, authorize placing mount on top */
+       error = prepare_coveredvp(vp, ctx, cnp, fsname);
+       if (error != 0) {
+               goto out1;
+       }
+       
+       /* Sanity check the name caller has provided */
+       vfsp = mp->mnt_vtable;
+       if (strncmp(vfsp->vfc_name, fsname, MFSNAMELEN) != 0) {
+               error = EINVAL;
+               goto out2;
+       }
+
+       /* Check the device vnode and update mount-from name, for local filesystems */
+       if (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS) {
+               if (is64bit) {
+                       if ( (error = copyin(fsmountargs, (caddr_t)&devpath, sizeof(devpath))) )
+                               goto out2;      
+                       fsmountargs += sizeof(devpath);
+               } else {
+                       user32_addr_t tmp;
+                       if ( (error = copyin(fsmountargs, (caddr_t)&tmp, sizeof(tmp))) )
+                               goto out2;      
+                       /* munge into LP64 addr */
+                       devpath = CAST_USER_ADDR_T(tmp);
+                       fsmountargs += sizeof(tmp);
+               }
+
+               if (devpath != USER_ADDR_NULL) {
+                       error = authorize_devpath_and_update_mntfromname(mp, devpath, &devvp, ctx);
+                       if (error) {
+                               goto out2;
+                       }
+
+                       vnode_put(devvp);
+               }
+       }
+
+       /* 
+        * Place mp on top of vnode, ref the vnode,  call checkdirs(),
+        * and increment the name cache's mount generation 
+        */
+       error = place_mount_and_checkdirs(mp, vp, ctx);
+       if (error != 0) {
+               goto out2;
+       }
+
+       placed = TRUE;
+
+       strncpy(old_mntonname, mp->mnt_vfsstat.f_mntonname, MAXPATHLEN);
+       strncpy(mp->mnt_vfsstat.f_mntonname, cnp->cn_pnbuf, MAXPATHLEN);
+
+       /* Forbid future moves */
+       mount_lock(mp);
+       mp->mnt_kern_flag |= MNTK_HAS_MOVED;
+       mount_unlock(mp);
+
+       /* Finally, add to mount list, completely ready to go */
+       error = mount_list_add(mp);
+       if (error != 0) {
+               goto out3;
+       }
+
+       mount_end_update(mp);
+       vnode_put(imgsrc_rootvnode);
+       FREE(old_mntonname, M_TEMP);
+
+       return 0;
+out3:
+       strncpy(mp->mnt_vfsstat.f_mntonname, old_mntonname, MAXPATHLEN);
+
+       mount_lock(mp);
+       mp->mnt_kern_flag &= ~(MNTK_HAS_MOVED);
+       mount_unlock(mp);
+
+out2:
+       /* 
+        * Placing the mp on the vnode clears VMOUNT,
+        * so cleanup is different after that point 
+        */
+       if (placed) {
+               /* Rele the vp, clear VMOUNT and v_mountedhere */
+               undo_place_on_covered_vp(mp, vp);
+       } else {
+               vnode_lock_spin(vp);
+               CLR(vp->v_flag, VMOUNT);
+               vnode_unlock(vp);
+       }
+out1:
+       mount_end_update(mp);
+
+out0:
+       vnode_put(imgsrc_rootvnode);
+       FREE(old_mntonname, M_TEMP);
+       return error;
+}
+
+#endif /* CONFIG_IMGSRC_ACCESS */
+
 void
 enablequotas(struct mount *mp, vfs_context_t ctx)
 {
@@ -1086,6 +1479,13 @@ safedounmount(struct mount *mp, int flags, vfs_context_t ctx)
                goto out;
        }
 
+#ifdef CONFIG_IMGSRC_ACCESS
+       if (mp->mnt_kern_flag & MNTK_BACKS_ROOT) {
+               error = EBUSY;
+               goto out;
+       }
+#endif /* CONFIG_IMGSRC_ACCESS */
+
        return (dounmount(mp, flags, 1, ctx));
 
 out:
index c28573b0a794e5411ea879ec9d525f58ad29f9a5..9a00f1027c64ca00e18deda96c020b9af7fd0cb0 100644 (file)
@@ -221,5 +221,18 @@ do_build_all:      build_symbol_sets
 
 do_build_install:      install_symbol_sets 
 
+EXPORTS_FILE_LIST = $(addprefix $(SOURCE)/,$(foreach set,$(SYMBOL_COMPONENT_LIST), $(set).exports $(set).$(ARCH_CONFIG_LC).exports))
+EXPORTS_FILE_LIST_NOSYSTEM60 = $(addprefix $(SOURCE)/,$(foreach set, $(filter-out System6.0,$(SYMBOL_COMPONENT_LIST)), $(set).exports $(set).$(ARCH_CONFIG_LC).exports))
+
+# Does not include "whole-kernel" clients
+build_mach_kernel_exports:
+       $(_v)if [ $(SUPPORT_SYSTEM60_KEXT) -eq 1 ]; then \
+               $(SOURCE)/generate_linker_exports.sh $(OBJPATH)/kernel-kpi.exp \
+                        $(EXPORTS_FILE_LIST) || exit 1; \
+       else \
+               $(SOURCE)/generate_linker_exports.sh $(OBJPATH)/kernel-kpi.exp \
+                        $(EXPORTS_FILE_LIST_NOSYSTEM60) || exit 1; \
+       fi;
+
 include $(MakeInc_rule)
 include $(MakeInc_dir)
index 44aab8c5cc8fceb84dfe8569460cf68453a3208f..6b41eba65a100433e5352c819ea602f5d6f8c839 100644 (file)
@@ -1,4 +1,4 @@
-10.2.0
+10.3.0
 
 # The first line of this file contains the master version number for the kernel.
 # All other instances of the kernel version in xnu are derived from this file.
diff --git a/config/generate_linker_exports.sh b/config/generate_linker_exports.sh
new file mode 100755 (executable)
index 0000000..4af69e9
--- /dev/null
@@ -0,0 +1,15 @@
+#!/bin/sh
+
+set -e
+
+if [ $# -lt 2 ]; then
+    echo "Usage: $0 output.exp input1 [input2 ... ]" 1>&2
+    exit 1
+fi
+
+OUTPUT="$1"
+shift
+
+( grep -h -v ":" "$@"; grep -h ":" "$@" | awk -F: '{print $2}' ) | sort -u > "$OUTPUT"
+
+exit 0
index 60343290091fb97f00a2f1f49bef339df4e4169f..66c178d5fb6387174a40196963d9dba42602dd60 100644 (file)
@@ -482,7 +482,8 @@ IODMACommand::walkAll(UInt8 op)
 
        if (state->fLocalMapper)
        {
-           state->fLocalMapperPageCount = atop_64(round_page(state->fPreparedLength));
+           state->fLocalMapperPageCount = atop_64(round_page(
+                   state->fPreparedLength + ((state->fPreparedOffset + fMDSummary.fPageAlign) & page_mask)));
            state->fLocalMapperPageAlloc = fMapper->iovmAllocDMACommand(this, state->fLocalMapperPageCount);
            state->fMapContig = true;
        }
index fd150a4f64290e91816765fe434a652f47b606ff..c7b4319ed3b49067ab99dc5434066203f4942b4d 100644 (file)
@@ -2950,6 +2950,17 @@ IOReturn IOService::startPowerChange (
                         &powerState, changeFlags);
 #endif
 
+    // Invalidate the last recorded tickle power state when a power transition
+    // is about to occur, and not as a result of a tickle request.
+
+    if ((getPMRequestType() != kIOPMRequestTypeActivityTickle) &&
+        (fActivityTicklePowerState != -1))
+    {
+        IOLockLock(fActivityLock);
+        fActivityTicklePowerState = -1;
+        IOLockUnlock(fActivityLock);
+    }
+
        // Initialize the change note.
 
     fHeadNoteFlags            = changeFlags;
index 4f6fc2bbe0d6bd3b96434801b887c45f9abd15d8..82cd0eeff96d9b313f324392ed2353b27e24481a 100644 (file)
--- a/kgmacros
+++ b/kgmacros
@@ -64,6 +64,7 @@ document kgm
 |     showtaskvme    Display info about the task's vm_map entries
 |     showtaskipc    Display info about the specified task's ipc space
 |     showtaskrights Display info about the task's ipc space entries
+|     showtaskrightsbt Display info about the task's ipc space entries with back traces
 |     showtaskbusyports    Display all of the task's ports with unread messages
 |
 |     showact       Display info about a thread specified by activation
@@ -213,7 +214,8 @@ document kgm
 |     showallgdbcorestacks Corefile equivalent of "showallgdbstacks"
 |     kdp-reenter      Schedule reentry into the debugger and continue.
 |     kdp-reboot       Restart remote target
-|     kdp-version       Get KDP version number
+|     kdp-version      Get KDP version number
+|     kdp-connect      "shorthand" connection macro
 |
 |     zstack           Print zalloc caller stack (zone leak debugging)
 |     findoldest       Find oldest zone leak debugging record
@@ -1136,6 +1138,10 @@ define showipcint
             if $kgm_ie.ie_bits & 0x001f0000
                 set $kgm_name = (($kgm_iindex << 8)|($kgm_ie.ie_bits >> 24))
                 showipceint $kgm_iep $kgm_name
+                if $arg2 != 0 && $kgm_ie.ie_object != 0 && ($kgm_ie.ie_bits & 0x00070000) && ((ipc_port_t) $kgm_ie.ie_object)->ip_callstack[0] != 0
+                    printf "              user bt: "
+                    showportbt $kgm_ie.ie_object $kgm_is.is_task
+                end
             end
             set $kgm_iindex = $kgm_iindex + 1
             set $kgm_iep = &($kgm_is.is_table[$kgm_iindex])
@@ -1151,7 +1157,7 @@ end
 define showipc
     set $kgm_isp = (ipc_space_t)$arg0
     showipcheader
-    showipcint $kgm_isp 0
+    showipcint $kgm_isp 0 0
 end
 document showipc
 Syntax: (gdb) showipc <ipc_space>
@@ -1161,7 +1167,7 @@ end
 define showrights
        set $kgm_isp = (ipc_space_t)$arg0
     showipcheader
-       showipcint $kgm_isp 1
+       showipcint $kgm_isp 1 0
 end
 document showrights
 Syntax: (gdb) showrights <ipc_space>
@@ -1174,7 +1180,7 @@ define showtaskipc
        showtaskheader
     showipcheader
        showtaskint $kgm_taskp
-       showipcint $kgm_taskp->itk_space 0
+       showipcint $kgm_taskp->itk_space 0 0
 end
 document showtaskipc
 Syntax: (gdb) showtaskipc <task>
@@ -1187,13 +1193,25 @@ define showtaskrights
        showtaskheader
     showipcheader
        showtaskint $kgm_taskp
-       showipcint $kgm_taskp->itk_space 1
+       showipcint $kgm_taskp->itk_space 1 0
 end
 document showtaskrights
 Syntax: (gdb) showtaskrights <task>
 | Routine to print info about the ipc rights for a task
 end
 
+define showtaskrightsbt
+       set $kgm_taskp = (task_t)$arg0
+       showtaskheader
+    showipcheader
+       showtaskint $kgm_taskp
+       showipcint $kgm_taskp->itk_space 1 1
+end
+document showtaskrightsbt
+Syntax: (gdb) showtaskrightsbt <task>
+| Routine to print info about the ipc rights for a task with backtraces
+end
+
 define showallipc
     set $kgm_head_taskp = &tasks
     set $kgm_cur_taskp = (struct task *)($kgm_head_taskp->next)
@@ -1201,7 +1219,7 @@ define showallipc
         showtaskheader
         showipcheader
         showtaskint $kgm_cur_taskp
-        showipcint $kgm_cur_taskp->itk_space 0
+        showipcint $kgm_cur_taskp->itk_space 0 0
        set $kgm_cur_taskp = (struct task *)($kgm_cur_taskp->tasks.next)
     end
 end
@@ -1218,7 +1236,7 @@ define showallrights
         showtaskheader
         showipcheader
         showtaskint $kgm_cur_taskp
-        showipcint $kgm_cur_taskp->itk_space 1
+        showipcint $kgm_cur_taskp->itk_space 1 0
        set $kgm_cur_taskp = (struct task *)($kgm_cur_taskp->tasks.next)
     end
 end
@@ -1631,6 +1649,25 @@ define showportmember
     printf "0x%08x\n", $kgm_portp->ip_messages.data.port.msgcount
 end
 
+define showportbt
+    set $kgm_iebt = ((ipc_port_t) $arg0)->ip_callstack
+    set $kgm_iepid = ((ipc_port_t) $arg0)->ip_spares[0]
+    set $kgm_procpid = ((proc_t) (((task_t) $arg1)->bsd_info))->p_pid
+    if $kgm_iebt[0] != 0
+        showptr $kgm_iebt[0]
+        set $kgm_iebt_loop_ctr = 1
+        while ($kgm_iebt_loop_ctr < 16 && $kgm_iebt[$kgm_iebt_loop_ctr])
+            printf " "
+            showptr $kgm_iebt[$kgm_iebt_loop_ctr]
+            set $kgm_iebt_loop_ctr = $kgm_iebt_loop_ctr + 1
+        end
+        if $kgm_iepid != $kgm_procpid
+            printf " (%d)", $kgm_iepid
+        end
+        printf "\n"
+    end
+end
+
 define showportint
     printf "0x%08x  ", $arg0
     set $kgm_portp = (struct ipc_port *)$arg0
@@ -2562,7 +2599,7 @@ define getdumpinfo
        dumpinfoint KDP_DUMPINFO_GETINFO
        set $kgm_dumpinfo = (kdp_dumpinfo_reply_t *) manual_pkt.data
        if $kgm_dumpinfo->type & KDP_DUMPINFO_REBOOT
-                 printf "Sysem will reboot after kernel info gets dumped.\n"
+                 printf "System will reboot after kernel info gets dumped.\n"
        else
                  printf "Sysem will not reboot after kernel info gets dumped.\n"
        end
@@ -10182,3 +10219,17 @@ Syntax:  showallbusyports
 |Routine to print information about all receive rights on the system that
 |have enqueued messages.
 end
+
+define kdp-connect
+    if $argc > 0
+       kdp-reattach $arg0
+    else
+       printf "Attempting to attach to localhost...\n"
+       kdp-reattach localhost
+    end
+end
+
+document kdp-connect
+Syntax: (gdb) kdpconnect <address-of-remote-host>
+| Attach to the machine with given hostname or IP address, or 'localhost' if blank 
+end
index 9f41164919163edbcac23146f4fc6899191da70c..acc3e3e98e128f2cd1affa85e865b2fba80c2000 100644 (file)
@@ -3949,21 +3949,6 @@ OSKext::load(
     Boolean              alreadyLoaded                = false;
     OSKext             * lastLoadedKext               = NULL;
 
-    if (!sLoadEnabled) {
-        if (!isLoaded() || (!isStarted() && startOpt != kOSKextExcludeNone) ||
-            (startMatchingOpt != kOSKextExcludeNone)) {
-
-            OSKextLog(this,
-                kOSKextLogErrorLevel |
-                kOSKextLogLoadFlag,
-                "Kext loading is disabled "
-                "(attempt to load/start/start matching for kext %s).",
-                getIdentifierCString());
-        }
-        result = kOSKextReturnDisabled;
-        goto finish;
-    }
-
     if (isLoaded()) {
         alreadyLoaded = true;
         result = kOSReturnSuccess;
@@ -3976,6 +3961,16 @@ OSKext::load(
         goto loaded;
     }
 
+    if (!sLoadEnabled) {
+        OSKextLog(this,
+            kOSKextLogErrorLevel |
+            kOSKextLogLoadFlag,
+            "Kext loading is disabled (attempt to load kext %s).",
+            getIdentifierCString());
+        result = kOSKextReturnDisabled;
+        goto finish;
+    }
+
    /* If we've pushed the next available load tag to the invalid value,
     * we can't load any more kexts.
     */
@@ -4136,9 +4131,7 @@ OSKext::load(
     OSKext::saveLoadedKextPanicList();
 
 loaded:
-   /* This is a bit of a hack, because we shouldn't be handling 
-    * personalities within the load function.
-    */
+
     if (declaresExecutable() && (startOpt == kOSKextExcludeNone)) {
         result = start();
         if (result != kOSReturnSuccess) {
@@ -4152,12 +4145,32 @@ loaded:
     
    /* If not excluding matching, send the personalities to the kernel.
     * This never affects the result of the load operation.
+    * This is a bit of a hack, because we shouldn't be handling 
+    * personalities within the load function.
     */
     if (result == kOSReturnSuccess && startMatchingOpt == kOSKextExcludeNone) {
-        sendPersonalitiesToCatalog(true, personalityNames);
+        result = sendPersonalitiesToCatalog(true, personalityNames);
     }
-    
 finish:
+
+   /* More hack! If the kext doesn't declare an executable, even if we
+    * "loaded" it, we have to remove any personalities naming it, or we'll
+    * never see the registry go quiet. Errors here do not count for the
+    * load operation itself.
+    *
+    * Note that in every other regard it's perfectly ok for a kext to
+    * not declare an executable and serve only as a package for personalities
+    * naming another kext, so we do have to allow such kexts to be "loaded"
+    * so that those other personalities get added & matched.
+    */
+    if (!declaresExecutable()) {
+        OSKextLog(this,
+            kOSKextLogStepLevel | kOSKextLogLoadFlag,
+            "Kext %s has no executable; removing any personalities naming it.",
+            getIdentifierCString());
+        removePersonalitiesFromCatalog();
+    }
+
     if (result != kOSReturnSuccess) {
         OSKextLog(this,
             kOSKextLogErrorLevel |
@@ -4721,6 +4734,16 @@ OSKext::start(bool startDependenciesFlag)
         goto finish;
     }
 
+    if (!sLoadEnabled) {
+        OSKextLog(this,
+            kOSKextLogErrorLevel |
+            kOSKextLogLoadFlag,
+            "Kext loading is disabled (attempt to start kext %s).",
+            getIdentifierCString());
+        result = kOSKextReturnDisabled;
+        goto finish;
+    }
+
     result = validateKextMapping(/* start? */ true);
     if (result != kOSReturnSuccess) {
         goto finish;
@@ -7763,15 +7786,26 @@ finish:
 /*********************************************************************
 Might want to change this to a bool return?
 *********************************************************************/
-void
+OSReturn
 OSKext::sendPersonalitiesToCatalog(
     bool      startMatching,
     OSArray * personalityNames)
 {
-    OSArray      * personalitiesToSend     = NULL;  // must release
-    OSDictionary * kextPersonalities = NULL;  // do not release
+    OSReturn       result              = kOSReturnSuccess;
+    OSArray      * personalitiesToSend = NULL;  // must release
+    OSDictionary * kextPersonalities   = NULL;  // do not release
     int            count, i;
 
+    if (!sLoadEnabled) {
+        OSKextLog(this,
+            kOSKextLogErrorLevel |
+            kOSKextLogLoadFlag,
+            "Kext loading is disabled (attempt to start matching for kext %s).",
+            getIdentifierCString());
+        result = kOSKextReturnDisabled;
+        goto finish;
+    }
+
     if (sSafeBoot && !isLoadableInSafeBoot()) {
         OSKextLog(this,
             kOSKextLogErrorLevel |
@@ -7779,7 +7813,8 @@ OSKext::sendPersonalitiesToCatalog(
             "Kext %s is not loadable during safe boot; "
             "not sending personalities to the IOCatalogue.",
             getIdentifierCString());
-        return;
+        result = kOSKextReturnNotLoadable;
+        goto finish;
     }
 
     if (!personalityNames || !personalityNames->getCount()) {
@@ -7788,10 +7823,12 @@ OSKext::sendPersonalitiesToCatalog(
         kextPersonalities = OSDynamicCast(OSDictionary,
             getPropertyForHostArch(kIOKitPersonalitiesKey));
         if (!kextPersonalities || !kextPersonalities->getCount()) {
+            // not an error
             goto finish;
         }
         personalitiesToSend = OSArray::withCapacity(0);
         if (!personalitiesToSend) {
+            result = kOSKextReturnNoMemory;
             goto finish;
         }
         count = personalityNames->getCount();
@@ -7824,10 +7861,12 @@ finish:
     if (personalitiesToSend) {
         personalitiesToSend->release();
     }
-    return;
+    return result;
 }
 
 /*********************************************************************
+* xxx - We should allow removing the kext's declared personalities,
+* xxx - even with other bundle identifiers.
 *********************************************************************/
 void
 OSKext::removePersonalitiesFromCatalog(void)
index 2ca7aafb210f10b3cecadc0553acaf2d8ddc6a33..b303c36038c2f0814cd0e2c47ae80d65f00d5973 100644 (file)
@@ -67,3 +67,6 @@ options               IPSEC                   # IP security   # <ipsec>
 
 options                CONFIG_KXLD             # kxld/runtime linking of kexts # <config_kxld>
 
+# secure_kernel - secure kernel from user programs
+options     SECURE_KERNEL       # <secure_kernel> 
+
index c763d0ac4cc89129af3780f36b1cb55d6941a0e7..15f992d67095b12d51610163ad7c4e0167ba0d85 100644 (file)
@@ -68,6 +68,7 @@ libkern/stack_protector.c       standard
 libkern/kxld/kxld.c             optional config_kxld
 libkern/kxld/kxld_array.c       optional config_kxld
 libkern/kxld/kxld_copyright.c   optional config_kxld
+libkern/kxld/kxld_demangle.c    optional config_kxld
 libkern/kxld/kxld_dict.c        optional config_kxld
 libkern/kxld/kxld_kext.c        optional config_kxld
 libkern/kxld/kxld_reloc.c       optional config_kxld
index 84412e08fa3a46b759050abdc94ae6a5a081def9..9bc3566c6b523949ddf3457c501a6798f7ed12df 100644 (file)
@@ -59,7 +59,7 @@ CFLAGS=-std=c99 -Wall -Wextra -Werror -pedantic -Wformat=2 -Wcast-align \
        -isysroot $(SDKROOT)
 LDFLAGS=$(ARCHS) -dynamiclib -install_name $(LIBKXLDNAME) \
        -compatibility_version $(COMPATIBILITY_VERSION) \
-       -current_version $(CURRENT_VERSION) -isysroot $(SDKROOT)
+       -current_version $(CURRENT_VERSION) -isysroot $(SDKROOT) -lstdc++
 INCLUDES=-I$(HDRSRC) $(INCFLAGS_EXTERN)
 
 # Tools
@@ -74,9 +74,9 @@ endif
 
 # Files
 HDR_NAMES=kxld.h kxld_types.h
-OBJ_NAMES=kxld.o kxld_array.o kxld_copyright.o kxld_dict.o kxld_kext.o kxld_reloc.o \
-       kxld_sect.o kxld_seg.o kxld_sym.o kxld_state.o kxld_symtab.o kxld_util.o \
-       kxld_uuid.o kxld_vtable.o
+OBJ_NAMES=kxld.o kxld_array.o kxld_copyright.o kxld_demangle.o kxld_dict.o \
+       kxld_kext.o kxld_reloc.o kxld_sect.o kxld_seg.o kxld_sym.o kxld_state.o \
+       kxld_symtab.o kxld_util.o kxld_uuid.o kxld_vtable.o
 HDRS=$(addprefix $(HDRSRC)/, $(HDR_NAMES))
 OBJS=$(addprefix $(OBJROOT)/, $(OBJ_NAMES))
 
index b04a6045ac1b9eee216bab7f7448c04b03565394..9720f3d08967e0a190bad159a5291cfd38630416 100644 (file)
@@ -86,8 +86,8 @@ kxld_array_init(KXLDArray *array, size_t itemsize, u_int nitems)
          */
         if (array->maxitems < nitems) {
             STAILQ_FOREACH_SAFE(srcpool, &array->pools, entries, tmp) {
-                STAILQ_INSERT_TAIL(&srcpools, srcpool, entries);
                 STAILQ_REMOVE(&array->pools, srcpool, kxld_array_pool, entries);
+                STAILQ_INSERT_TAIL(&srcpools, srcpool, entries);
             }
             srcpool_capacity = array->pool_capacity;
             bzero(array, sizeof(*array));
diff --git a/libkern/kxld/kxld_demangle.c b/libkern/kxld/kxld_demangle.c
new file mode 100644 (file)
index 0000000..98ca4d5
--- /dev/null
@@ -0,0 +1,46 @@
+#if !KERNEL
+
+#include <stdlib.h>
+
+/* This demangler is part of the C++ ABI.  We don't include it directly from
+ * <cxxabi.h> so that we can avoid using C++ in the kernel linker.
+ */
+extern char * 
+__cxa_demangle(const char* __mangled_name, char* __output_buffer,
+               size_t* __length, int* __status);
+
+#endif /* !KERNEL */
+
+#include "kxld_demangle.h"
+
+/*******************************************************************************
+*******************************************************************************/
+const char *
+kxld_demangle(const char *str, char **buffer __unused, size_t *length __unused)
+{
+#if KERNEL
+    return str;
+#else
+    const char *rval = NULL;
+    char *demangled = NULL;
+    int status;
+
+    if (!str) goto finish;
+
+    rval = str;
+
+    if (!buffer || !length) goto finish;
+
+    /* Symbol names in the symbol table have an extra '_' prepended to them,
+     * so we skip the first character to make the demangler happy.
+     */
+    demangled = __cxa_demangle(str+1, *buffer, length, &status);
+    if (!demangled || status) goto finish;
+    
+    *buffer = demangled;
+    rval = demangled;
+finish:
+    return rval;
+#endif
+}
+
diff --git a/libkern/kxld/kxld_demangle.h b/libkern/kxld/kxld_demangle.h
new file mode 100644 (file)
index 0000000..1fee331
--- /dev/null
@@ -0,0 +1,24 @@
+#ifndef _KXLD_DEMANGLE_H_
+#define _KXLD_DEMANGLE_H_
+
+#include <sys/types.h>
+
+/* @function kxld_demangle
+   
+ * @abstract Demangles c++ symbols. 
+ * 
+ * @param str           The C-string to be demangled.
+ * @param buffer        A pointer to a character buffer for storing the result.
+ *                      If NULL, a buffer will be malloc'd and stored here.
+ *                      If the buffer is not large enough, it will be realloc'd.
+ *
+ * @param length        The length of the buffer.
+ * 
+ * @result              If the input string could be demangled, it returns the
+ *                      demangled string.  Otherwise, returns the input string.
+ * 
+ */
+const char * kxld_demangle(const char *str, char **buffer, size_t *length)
+    __attribute__((pure, nonnull, visibility("hidden")));
+
+#endif /* !_KXLD_DEMANGLE_H_ */
index 7b5623003090603a876d7b10bcaa0451ef41a581..a5520711e8b41d018cb7d1f530212065eb8d2d34 100644 (file)
@@ -51,6 +51,7 @@
 #define DEBUG_ASSERT_COMPONENT_NAME_STRING "kxld"
 #include <AssertMacros.h>
 
+#include "kxld_demangle.h"
 #include "kxld_dict.h"
 #include "kxld_kext.h"
 #include "kxld_reloc.h"
@@ -1096,6 +1097,10 @@ create_vtables(KXLDKext *kext)
     char class_name[KXLD_MAX_NAME_LEN];
     char vtable_name[KXLD_MAX_NAME_LEN];
     char meta_vtable_name[KXLD_MAX_NAME_LEN];
+    char *demangled_name1 = NULL;
+    char *demangled_name2 = NULL;
+    size_t demangled_length1 = 0;
+    size_t demangled_length2 = 0;
     u_int i = 0;
     u_int nvtables = 0;
 
@@ -1161,7 +1166,10 @@ create_vtables(KXLDKext *kext)
                 } else {
                     kxld_log(kKxldLogPatching, kKxldLogErr, 
                         "Warning: " kKxldLogMissingVtable, 
-                        meta_vtable_name, class_name);
+                        kxld_demangle(meta_vtable_name, &demangled_name1, 
+                            &demangled_length1), 
+                        kxld_demangle(class_name, &demangled_name2, 
+                            &demangled_length2));
                     kxld_array_resize(&kext->vtables, --nvtables);
                 }
             }
@@ -1231,6 +1239,10 @@ create_vtables(KXLDKext *kext)
     rval = KERN_SUCCESS;
 
 finish:
+
+    if (demangled_name1) kxld_free(demangled_name1, demangled_length1);
+    if (demangled_name2) kxld_free(demangled_name2, demangled_length2);
+
     return rval;
 }
 
@@ -1950,6 +1962,8 @@ resolve_symbols(KXLDKext *kext, KXLDDict *defined_symbols,
     boolean_t tests_for_weak = FALSE;
     boolean_t error = FALSE;
     boolean_t warning = FALSE;
+    char *demangled_name = NULL;
+    size_t demangled_length = 0;
 
     check(kext);
     check(defined_symbols);
@@ -1981,8 +1995,8 @@ resolve_symbols(KXLDKext *kext, KXLDDict *defined_symbols,
                     "The following symbols were defined more than once:");
             }
 
-            kxld_log(kKxldLogLinking, kKxldLogErr,
-                "\t%s: %p - %p", sym->name, 
+            kxld_log(kKxldLogLinking, kKxldLogErr, "\t%s: %p - %p", 
+                kxld_demangle(sym->name, &demangled_name, &demangled_length),
                 (void *) (uintptr_t) sym->link_addr, 
                 (void *) (uintptr_t) addr);
         }
@@ -2011,7 +2025,8 @@ resolve_symbols(KXLDKext *kext, KXLDDict *defined_symbols,
                          "The following are common symbols:");
                 }
             }
-            kxld_log(kKxldLogLinking, kKxldLogErr, "\t%s", sym->name);
+            kxld_log(kKxldLogLinking, kKxldLogErr, "\t%s", 
+                kxld_demangle(sym->name, &demangled_name, &demangled_length));
 
         } else {
 
@@ -2045,7 +2060,8 @@ resolve_symbols(KXLDKext *kext, KXLDDict *defined_symbols,
 
                 if (obsolete_symbols && kxld_dict_find(obsolete_symbols, name)) {
                     kxld_log(kKxldLogLinking, kKxldLogWarn, 
-                        "This kext uses obsolete symbol %s.", name);
+                        "This kext uses obsolete symbol %s.", 
+                        kxld_demangle(name, &demangled_name, &demangled_length));
                 }
 
             } else if (kext->link_type == KXLD_LINK_PSEUDO_KEXT) {
@@ -2058,7 +2074,8 @@ resolve_symbols(KXLDKext *kext, KXLDDict *defined_symbols,
                         "This symbol set has the following unresolved symbols:");
                     warning = TRUE;
                 }
-                kxld_log(kKxldLogLinking, kKxldLogErr, "\t%s", sym->name);
+                kxld_log(kKxldLogLinking, kKxldLogErr, "\t%s", 
+                    kxld_demangle(sym->name, &demangled_name, &demangled_length));
                 kxld_sym_delete(sym);
 
             } else if (kxld_sym_is_weak(sym)) {
@@ -2092,6 +2109,7 @@ resolve_symbols(KXLDKext *kext, KXLDDict *defined_symbols,
     rval = KERN_SUCCESS;
 
 finish:
+    if (demangled_name) kxld_free(demangled_name, demangled_length);
 
     return rval;
 }
@@ -2148,6 +2166,10 @@ patch_vtables(KXLDKext *kext, KXLDDict *patched_vtables,
     char vtable_name[KXLD_MAX_NAME_LEN];
     char super_vtable_name[KXLD_MAX_NAME_LEN];
     char final_sym_name[KXLD_MAX_NAME_LEN];
+    char *demangled_name1 = NULL;
+    char *demangled_name2 = NULL;
+    size_t demangled_length1 = 0;;
+    size_t demangled_length2 = 0;
     size_t len = 0;
     u_int nvtables = 0;
     u_int npatched = 0;
@@ -2204,7 +2226,11 @@ patch_vtables(KXLDKext *kext, KXLDDict *patched_vtables,
 
                 if (failure) {
                     kxld_log(kKxldLogPatching, kKxldLogErr, 
-                        "\t%s (super vtable %s)", vtable_name, super_vtable_name);
+                        "\t'%s' (super vtable '%s')", 
+                        kxld_demangle(vtable_name, &demangled_name1, 
+                            &demangled_length1), 
+                        kxld_demangle(super_vtable_name, &demangled_name2, 
+                            &demangled_length2));
                     continue;
                 }
 
@@ -2228,8 +2254,11 @@ patch_vtables(KXLDKext *kext, KXLDDict *patched_vtables,
                 require_action(!final_sym, finish, 
                     rval=KERN_FAILURE;
                     kxld_log(kKxldLogPatching, kKxldLogErr, 
-                        "Class %s is a subclass of final class %s.",
-                        class_name, super_class_name));
+                        "Class '%s' is a subclass of final class '%s'.",
+                        kxld_demangle(class_name, &demangled_name1, 
+                            &demangled_length1), 
+                        kxld_demangle(super_class_name, &demangled_name2, 
+                            &demangled_length2)));
 
                 /* Patch the class's vtable */
                 rval = kxld_vtable_patch(vtable, super_vtable, kext->symtab,
@@ -2297,6 +2326,9 @@ patch_vtables(KXLDKext *kext, KXLDDict *patched_vtables,
 
     rval = KERN_SUCCESS;
 finish:
+    if (demangled_name1) kxld_free(demangled_name1, demangled_length1);
+    if (demangled_name2) kxld_free(demangled_name2, demangled_length2);
+
     return rval;
 }
 
@@ -2309,6 +2341,8 @@ validate_symbols(KXLDKext *kext)
     KXLDSymtabIterator iter;
     KXLDSym *sym = NULL;
     u_int error = FALSE;
+    char *demangled_name = NULL;
+    size_t demangled_length = 0;
     
     /* Check for any unresolved symbols */
     kxld_symtab_iterator_init(&iter, kext->symtab, kxld_sym_is_unresolved, FALSE);
@@ -2318,13 +2352,15 @@ validate_symbols(KXLDKext *kext)
             kxld_log(kKxldLogLinking, kKxldLogErr, 
                 "The following symbols are unresolved for this kext:");
         }
-        kxld_log(kKxldLogLinking, kKxldLogErr, "\t%s", sym->name);
+        kxld_log(kKxldLogLinking, kKxldLogErr, "\t%s", 
+            kxld_demangle(sym->name, &demangled_name, &demangled_length));
     }
     require_noerr_action(error, finish, rval=KERN_FAILURE);
 
     rval = KERN_SUCCESS;
 
 finish:
+    if (demangled_name) kxld_free(demangled_name, demangled_length);
     return rval;
 }
 
index 9c387e670b0579b8e5a28a1d10f7b1f0a419dcbc..3392b4a74a2ef205ca8ff0aa533f293fdc0c9724 100644 (file)
@@ -115,17 +115,17 @@ void kxld_log(KXLDLogSubsystem subsystem, KXLDLogLevel level,
 #define kKxldLogArchNotSupported        "The target architecture (cputype 0x%x) is not supported by kxld."
 #define kKxldLogArchNotFound            "The kext does not contain a fat slice for the target architecture."
 #define kKxldLogFiletypeNotSupported    "The Mach-O filetype 0x%x is not supported on the target architecture."
-#define kKxldLogTruncatedMachO          "The Mach-O file has been truncated.  Make sure the Mach-O header structures are correct."
+#define kKxldLogTruncatedMachO          "The Mach-O file has been truncated. Make sure the Mach-O header structures are correct."
 #define kKxldLogMalformedMachO          "The Mach-O file is malformed: "
-#define kKxldLogMalformedVTable         "The vtable %s is malformed.  Make sure your kext has been built against the correct headers."
-#define kKxldLogMissingVtable           "Cannot find the vtable %s for class %s.  This vtable symbol is required for binary compatibility, and it may have been stripped."
-#define kKxldLogParentOutOfDate         "The super class vtable %s for vtable %s is out of date.  Make sure your kext has been built against the correct headers."
+#define kKxldLogMalformedVTable         "The vtable '%s' is malformed. Make sure your kext has been built against the correct headers."
+#define kKxldLogMissingVtable           "Cannot find the vtable '%s' for class '%s'. This vtable symbol is required for binary compatibility, and it may have been stripped."
+#define kKxldLogParentOutOfDate         "The super class vtable '%s' for vtable '%s' is out of date. Make sure your kext has been built against the correct headers."
 #define kKxldLogNoKmodInfo              "The kext is missing its kmod_info structure."
 #define kKxldLogInvalidSectReloc        "Relocation entry %u from section %s,%s cannot be processed."
 #define kKxldLogInvalidExtReloc         "External relocation entry %u cannot be processed."
 #define kKxldLogInvalidIntReloc         "Internal relocation entry %u cannot be processed."
-#define kKxldLogRelocationOverflow      "A relocation entry has overflowed.  The kext may be too far from one " \
-                                        "of its dependencies.  Check your kext's load address."
+#define kKxldLogRelocationOverflow      "A relocation entry has overflowed. The kext may be too far from one " \
+                                        "of its dependencies. Check your kext's load address."
 
 /*******************************************************************************
 * Allocators 
index 78e647e6bbff0651c04afd554495b39889b81261..208c030d98921b321fa4ed8a9660538cdc06dc9a 100644 (file)
@@ -32,6 +32,7 @@
 #define DEBUG_ASSERT_COMPONENT_NAME_STRING "kxld"
 #include <AssertMacros.h>
 
+#include "kxld_demangle.h"
 #include "kxld_reloc.h"
 #include "kxld_sect.h"
 #include "kxld_state.h"
@@ -73,6 +74,8 @@ kxld_vtable_init_from_kernel_macho(KXLDVTable *vtable, const KXLDSym *sym,
     const KXLDRelocator *relocator)
 {
     kern_return_t rval = KERN_FAILURE;
+    char *demangled_name = NULL;
+    size_t demangled_length = 0;
 
     check(vtable);
     check(sym);
@@ -86,7 +89,8 @@ kxld_vtable_init_from_kernel_macho(KXLDVTable *vtable, const KXLDSym *sym,
     require_action(kxld_sect_get_num_relocs(sect) == 0, finish,
         rval=KERN_FAILURE;
         kxld_log(kKxldLogPatching, kKxldLogErr, 
-            kKxldLogMalformedVTable, vtable->name));
+            kKxldLogMalformedVTable,
+            kxld_demangle(vtable->name, &demangled_name, &demangled_length)));
 
     rval = init_by_entries(vtable, symtab, relocator);
     require_noerr(rval, finish);
@@ -96,8 +100,8 @@ kxld_vtable_init_from_kernel_macho(KXLDVTable *vtable, const KXLDSym *sym,
     rval = KERN_SUCCESS;
 
 finish:
-
     if (rval) kxld_vtable_deinit(vtable);
+    if (demangled_name) kxld_free(demangled_name, demangled_length);
 
     return rval;
 }
@@ -110,6 +114,8 @@ kxld_vtable_init_from_object_macho(KXLDVTable *vtable, const KXLDSym *sym,
     const KXLDRelocator *relocator)
 {
     kern_return_t rval = KERN_FAILURE;
+    char *demangled_name = NULL;
+    size_t demangled_length = 0;
 
     check(vtable);
     check(sym);
@@ -123,7 +129,8 @@ kxld_vtable_init_from_object_macho(KXLDVTable *vtable, const KXLDSym *sym,
     require_action(kxld_sect_get_num_relocs(sect) > 0, finish,
         rval=KERN_FAILURE;
         kxld_log(kKxldLogPatching, kKxldLogErr, 
-            kKxldLogMalformedVTable, vtable->name));
+            kKxldLogMalformedVTable, 
+            kxld_demangle(vtable->name, &demangled_name, &demangled_length)));
 
     rval = init_by_relocs(vtable, sym, sect, symtab, relocator);
     require_noerr(rval, finish);
@@ -131,8 +138,8 @@ kxld_vtable_init_from_object_macho(KXLDVTable *vtable, const KXLDSym *sym,
     rval = KERN_SUCCESS;
 
 finish:
-
     if (rval) kxld_vtable_deinit(vtable);
+    if (demangled_name) kxld_free(demangled_name, demangled_length);
 
     return rval;
 }
@@ -145,6 +152,8 @@ kxld_vtable_init_from_final_macho(KXLDVTable *vtable, const KXLDSym *sym,
     const KXLDRelocator *relocator, const KXLDArray *relocs)
 {
     kern_return_t rval = KERN_FAILURE;
+    char *demangled_name = NULL;
+    size_t demangled_length = 0;
 
     check(vtable);
     check(sym);
@@ -158,7 +167,8 @@ kxld_vtable_init_from_final_macho(KXLDVTable *vtable, const KXLDSym *sym,
     require_action(kxld_sect_get_num_relocs(sect) == 0, finish,
         rval=KERN_FAILURE;
         kxld_log(kKxldLogPatching, kKxldLogErr, 
-            kKxldLogMalformedVTable, vtable->name));
+            kKxldLogMalformedVTable, 
+            kxld_demangle(vtable->name, &demangled_name, &demangled_length)));
 
     rval = init_by_entries_and_relocs(vtable, sym, symtab,
         relocator, relocs);
@@ -168,6 +178,7 @@ kxld_vtable_init_from_final_macho(KXLDVTable *vtable, const KXLDSym *sym,
 
 finish:
     if (rval) kxld_vtable_deinit(vtable);
+    if (demangled_name) kxld_free(demangled_name, demangled_length);
 
     return rval;
 }
@@ -499,6 +510,8 @@ init_by_entries_and_relocs(KXLDVTable *vtable, const KXLDSym *sym,
     kxld_addr_t entry_offset = 0;
     u_int nentries = 0;
     u_int i = 0;
+    char *demangled_name1 = NULL;
+    size_t demangled_length1 = 0;
 
     check(vtable);
     check(sym);
@@ -573,7 +586,9 @@ init_by_entries_and_relocs(KXLDVTable *vtable, const KXLDSym *sym,
             require_action(reloc, finish,
                 rval=KERN_FAILURE;
                 kxld_log(kKxldLogPatching, kKxldLogErr, 
-                    kKxldLogMalformedVTable, vtable->name));
+                    kKxldLogMalformedVTable, 
+                    kxld_demangle(vtable->name, &demangled_name1, 
+                        &demangled_length1)));
         
             tmpsym = kxld_reloc_get_symbol(relocator, reloc, 
                 /* data */ NULL, symtab);
@@ -630,6 +645,12 @@ kxld_vtable_patch(KXLDVTable *vtable, const KXLDVTable *super_vtable,
     KXLDSym *sym = NULL;
     u_int symindex = 0;
     u_int i = 0;
+    char *demangled_name1 = NULL;
+    char *demangled_name2 = NULL;
+    char *demangled_name3 = NULL;
+    size_t demangled_length1 = 0;
+    size_t demangled_length2 = 0;
+    size_t demangled_length3 = 0;
 
     check(vtable);
     check(super_vtable);
@@ -637,8 +658,8 @@ kxld_vtable_patch(KXLDVTable *vtable, const KXLDVTable *super_vtable,
     require_action(!vtable->is_patched, finish, rval=KERN_SUCCESS);
     require_action(vtable->entries.nitems >= super_vtable->entries.nitems, finish,
         rval=KERN_FAILURE;
-        kxld_log(kKxldLogPatching, kKxldLogErr, 
-            kKxldLogMalformedVTable, vtable->name));
+        kxld_log(kKxldLogPatching, kKxldLogErr, kKxldLogMalformedVTable, 
+            kxld_demangle(vtable->name, &demangled_name1, &demangled_length1)));
 
     for (i = 0; i < super_vtable->entries.nitems; ++i) {
         child_entry = kxld_array_get_item(&vtable->entries, i);
@@ -688,7 +709,11 @@ kxld_vtable_patch(KXLDVTable *vtable, const KXLDVTable *super_vtable,
         require_action(!kxld_sym_name_is_padslot(parent_entry->patched.name),
             finish, rval=KERN_FAILURE;
             kxld_log(kKxldLogPatching, kKxldLogErr, 
-                kKxldLogParentOutOfDate, super_vtable->name, vtable->name));
+                kKxldLogParentOutOfDate, 
+                kxld_demangle(super_vtable->name, &demangled_name1, 
+                    &demangled_length1), 
+                kxld_demangle(vtable->name, &demangled_name2, 
+                    &demangled_length2)));
 
 #if KXLD_USER_OR_STRICT_PATCHING
         /* 5) If we are doing strict patching, we prevent kexts from declaring
@@ -748,8 +773,11 @@ kxld_vtable_patch(KXLDVTable *vtable, const KXLDVTable *super_vtable,
         require_noerr(rval, finish);
 
         kxld_log(kKxldLogPatching, kKxldLogDetail,
-            "In vtable %s, patching %s with %s.", 
-            vtable->name, child_entry->unpatched.sym->name, sym->name);
+            "In vtable '%s', patching '%s' with '%s'.", 
+            kxld_demangle(vtable->name, &demangled_name1, &demangled_length1),
+            kxld_demangle(child_entry->unpatched.sym->name, 
+                &demangled_name2, &demangled_length2), 
+            kxld_demangle(sym->name, &demangled_name3, &demangled_length3));
 
         kxld_sym_patch(child_entry->unpatched.sym);
         child_entry->unpatched.sym = sym;
@@ -779,6 +807,10 @@ kxld_vtable_patch(KXLDVTable *vtable, const KXLDVTable *super_vtable,
     rval = KERN_SUCCESS;
 
 finish:
+    if (demangled_name1) kxld_free(demangled_name1, demangled_length1);
+    if (demangled_name2) kxld_free(demangled_name2, demangled_length2);
+    if (demangled_name3) kxld_free(demangled_name3, demangled_length3);
+    
     return rval;
 }
 
index d8e157483531fdf2e43960cce9a5f12e48d21640..36d9127e29c393ddab5bbdb5ccc14f7ca219f02e 100644 (file)
@@ -49,7 +49,7 @@ extern "C" {
  * reading and updating of values.
  */
  
-#if defined(__i386__) || defined(__x86_64__)
+#if defined(__i386__) || defined(__x86_64__) || defined(__arm__)
 
 /*!
  * @function OSCompareAndSwap64
index 14337f3ab025386dd55e41bd2ac05d9a201e45f7..312d53993e761251522be754599d714fd252bba4 100644 (file)
@@ -399,7 +399,7 @@ private:
 
     static  void  sendAllKextPersonalitiesToCatalog(
         bool startMatching = false);
-    virtual void  sendPersonalitiesToCatalog(
+    virtual OSReturn  sendPersonalitiesToCatalog(
         bool      startMatching    = false,
         OSArray * personalityNames = NULL);
     
index e1fc062e1853cf35a00b2b837b5c4958ece649d8..86238fc35a48b39f2f39c26af44379668f16434c 100644 (file)
  * 
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
+#include <stdint.h> // For uintptr_t.
 #include <string.h>
 #include <libkern/mkext.h>
 
+
 #define BASE 65521L /* largest prime smaller than 65536 */
-#define NMAX 5000  
-// NMAX (was 5521) the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1
+#define NMAX 5552  // the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1
 
 #define DO1(buf,i)  {s1 += buf[i]; s2 += s1;}
 #define DO2(buf,i)  DO1(buf,i); DO1(buf,i+1);
@@ -45,6 +46,23 @@ mkext_adler32(uint8_t *buf, int32_t len)
     unsigned long s2 = 0; // (adler >> 16) & 0xffff;
     int k;
 
+#if defined _ARM_ARCH_6
+
+       /* align buf to 16-byte boundary */
+    while ((((uintptr_t)buf)&15)&&(len>0)) { /* not on a 16-byte boundary */
+        len--;
+        s1 += *buf++;
+        s2 += s1;
+        if (s1 >= BASE) s1 -= BASE;
+    }
+       s2 %= BASE;
+
+       if (len>=16) {
+               return adler32_vec(s1, s2, buf, len);
+       }
+
+#endif
+
     while (len > 0) {
         k = len < NMAX ? len : NMAX;
         len -= k;
index c94fde18702ee0957c3b2fbe020d00c01b29f177..bf0d9723ac2e275dee4a669d6b521b33184c6049 100644 (file)
@@ -32,6 +32,9 @@
 
 /* @(#) $Id$ */
 
+#include <stdint.h> // For uintptr_t.
+
+
 #define ZLIB_INTERNAL
 #if KERNEL
     #include <libkern/zlib.h>
     #include "zlib.h"
 #endif /* KERNEL */
 
+#if defined _ARM_ARCH_6
+       extern uLong adler32_vec(uLong adler, uLong sum2, const Bytef *buf, uInt len);
+#endif
+
 #define BASE 65521UL    /* largest prime smaller than 65536 */
 #define NMAX 5552
 /* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */
@@ -91,7 +98,9 @@ uLong ZEXPORT adler32(adler, buf, len)
     uInt len;
 {
     unsigned long sum2;
+#if !defined _ARM_ARCH_6
     unsigned n;
+#endif
 
     /* split Adler-32 into component sums */
     sum2 = (adler >> 16) & 0xffff;
@@ -124,6 +133,20 @@ uLong ZEXPORT adler32(adler, buf, len)
         return adler | (sum2 << 16);
     }
 
+#if defined _ARM_ARCH_6
+    /* align buf to 16-byte boundary */
+    while (((uintptr_t)buf)&15) { /* not on a 16-byte boundary */
+        len--;
+        adler += *buf++;
+        sum2 += adler;
+        if (adler >= BASE) adler -= BASE;
+        MOD4(sum2);             /* only added so many BASE's */
+    }
+
+    return adler32_vec(adler, sum2, buf, len);      // armv7 neon vectorized implementation
+
+#else   //  _ARM_ARCH_6
+
     /* do length NMAX blocks -- requires just one modulo operation */
     while (len >= NMAX) {
         len -= NMAX;
@@ -153,6 +176,8 @@ uLong ZEXPORT adler32(adler, buf, len)
 
     /* return recombined sums */
     return adler | (sum2 << 16);
+
+#endif  // _ARM_ARCH_6
 }
 
 /* ========================================================================= */
diff --git a/libkern/zlib/arm/adler32vec.s b/libkern/zlib/arm/adler32vec.s
new file mode 100644 (file)
index 0000000..3af072c
--- /dev/null
@@ -0,0 +1,428 @@
+#include <arm/arch.h>
+
+#define BASE 65521         /* largest prime smaller than 65536 */
+#define NMAX 5552              /* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */
+
+// Note: buf should have been 16-byte aligned in the caller function,
+
+// uLong adler32_vec(unsigned int adler, unsigned int sum2, const Bytef* buf, int len) {
+//    unsigned n;
+//    while (len >= NMAX) {
+//        len -= NMAX;
+//        n = NMAX / 16;          /* NMAX is divisible by 16 */
+//        do {
+//            DO16(buf);          /* 16 sums unrolled */
+//            buf += 16;
+//        } while (--n);
+//        MOD(adler);
+//        MOD(sum2);
+//    }
+//    if (len) {                  /* avoid modulos if none remaining */
+//        while (len >= 16) {
+//            len -= 16;
+//            DO16(buf);
+//            buf += 16;
+//        }
+//        while (len--) {
+//            adler += *buf++;
+//            sum2 += adler;
+//        }
+//        MOD(adler);
+//        MOD(sum2);
+//    }
+//    return adler | (sum2 << 16);             /* return recombined sums */
+// }
+
+
+/* 
+       DO16 vectorization:
+       given initial unsigned int sum2 and adler, and a new set of 16 input bytes (x[0:15]), it can be shown that
+       sum2  += (16*adler + 16*x[0] + 15*x[1] + ... + 1*x[15]);
+       adler += (x[0] + x[1] + ... + x[15]);
+
+       therefore, this is what can be done to vectorize the above computation
+       1. 16-byte aligned vector load into q2 (x[0:x15])
+       2. sum2 += (adler<<4);
+       3. vmull.u8 (q9,q8),q2,d2 where d2 = (1,1,1,1...,1), (q9,q8) + 16 16-bit elements x[0:15]
+       4. vmull.u8 (q11,q10),q2,q0 where q0 = (1,2,3,4...,16), (q11,q10) + 16 16-bit elements (16:1)*x[0:15]
+       5. parallel add (with once expansion to 32-bit) (q9,q8) and (q11,q10) all the way to accumulate to adler and sum2 
+
+       In this revision, whenever possible, 2 DO16 loops are combined into a DO32 loop.
+       1. 32-byte aligned vector load into q2,q14 (x[0:x31])
+    2. sum2 += (adler<<5);
+    3. vmull.u8 (4 q registers),(q2,q14),d2 where d2 = (1,1,1,1...,1), (4 q registers) : 32 16-bit elements x[0:31]
+       4. vmull.u8 (4 q registers),(q2,q14),(q0,q15) where q0 = (1,...,32), (4 q regs) : 32 16-bit elements (32:1)*x[0:31]
+    5. parallel add (with once expansion to 32-bit) the pair of (4 q regs) all the way to accumulate to adler and sum2 
+
+       This change improves the performance by ~ 0.55 cycle/uncompress byte on ARM Cortex-A8.
+
+*/
+
+/*
+       MOD implementation:
+       adler%BASE = adler - floor(adler*(1/BASE))*BASE; where (1/BASE) = 0x80078071 in Q47
+       1. vmull.u32   q2,(adler,sum2),(1/BASE)         // *(1/BASE) in Q47
+    2. vshr.u64    q2,q2,#47                                   // floor function
+    3. vpadd.u32   d4,d4,d5                                            // merge into a double word in d4
+    4. vmls.u32    (adler,sum2),d4,d3[0]        // (adler,sum2) -= floor[(adler,sum2)/BASE]*BASE
+        
+*/
+
+#if defined _ARM_ARCH_6                        // this file would be used only for armv6 or above
+
+
+       .text
+       .align 2
+       .globl _adler32_vec
+_adler32_vec:
+#if (!KERNEL_SUPPORT_NEON) || (!defined _ARM_ARCH_7)   // for armv6 or armv7 without neon support
+
+
+       #define adler                   r0
+       #define sum2                    r1
+       #define buf                             r2
+       #define len                             r3      
+       #define one_by_base             r4
+       #define base                    r5
+       #define nmax                    r6
+       #define t                               r12
+       #define vecs                    lr
+       #define x0                              r8
+       #define x1                              r10
+       #define x2                              r11
+       #define x3                              r12
+       #define zero                    r9
+
+       // this macro performs adler/sum2 update for 4 input bytes
+
+       .macro DO4
+       add             sum2, adler, lsl #2                             // sum2 += 4*adler;
+       ldr             x0,[buf]                                                // 4 bytes in 1 32-bit word
+       usada8  adler, x0, zero, adler                  // adler += sum(x0:x3)
+       ldrb    x0,[buf], #4                                    // x0
+       ldrb    x2,[buf,#-2]                                    // x2
+       ldrb    x1,[buf,#-3]                                    // x1
+       ldrb    x3,[buf,#-1]                                    // x3
+       add             sum2, x0, lsl #2                                // sum2 += 4*x0
+       add             x3, x3, x1, lsl #1                              // x3+2*x1
+       add             sum2, x2, lsl #1                                // sum2 += 2*x2
+       add             x3, x1                                                  // x3+3*x1
+       add             sum2, x3                                                // sum2 += x3+3*x1
+       .endm
+
+       // the following macro cascades 4 DO4 into a adler/sum2 update for 16 bytes
+       .macro DO16
+       DO4                                                                             // adler/sum2 update for 4 input bytes
+       DO4                                                                             // adler/sum2 update for 4 input bytes
+       DO4                                                                             // adler/sum2 update for 4 input bytes
+       DO4                                                                             // adler/sum2 update for 4 input bytes
+       .endm
+
+       // the following macro performs adler sum2 modulo BASE
+       .macro  modulo_base
+       umull   x0,x1,adler,one_by_base                 // adler/BASE in Q47
+       umull   x2,x3,sum2,one_by_base                  // sum2/BASE in Q47
+       lsr             x1, #15                                                 // x1 >> 15 = floor(adler/BASE)
+       lsr             x3, #15                                                 // x3 >> 15 = floor(sum2/BASE)
+       mla             adler, x1, base, adler                  // adler %= base;
+       mla             sum2, x3, base, sum2                    // sum2 %= base;
+       .endm
+
+       adr             t, coeffs       
+       push    {r4-r6, r8-r11, lr}
+       ldmia   t, {one_by_base, base, nmax}    // load up coefficients
+
+       subs        len, nmax                   // pre-subtract len by NMAX
+       eor                     zero, zero                                      // a dummy zero register to use usada8 instruction
+    blt         len_lessthan_NMAX           // if (len < NMAX) skip the while loop     
+
+while_lengenmax_loop:                                          // do {
+    lsr         vecs, nmax, #4              // vecs = NMAX/16;
+
+len16_loop:                                                                    // do {
+
+       DO16
+
+       subs    vecs, #1                                                // vecs--;
+       bgt                     len16_loop                                      // } while (vec>0);     
+
+       modulo_base                                                             // adler sum2 modulo BASE
+
+       subs            len, nmax                                       // len -= NMAX
+       bge                     while_lengenmax_loop            // } while (len >= NMAX);
+
+len_lessthan_NMAX:
+       adds            len, nmax                                       // post-subtract len by NMAX
+
+       subs            len, #16                                        // pre-decrement len by 16
+       blt                     len_lessthan_16
+
+len16_loop2:
+
+       DO16
+
+       subs            len, #16
+       bge                     len16_loop2
+
+len_lessthan_16:
+       adds            len, #16                                        // post-increment len by 16
+       beq                     len_is_zero
+
+remaining_buf:
+       ldrb            x0, [buf], #1
+       subs            len, #1
+       add                     adler, x0
+       add                     sum2, adler
+       bgt                     remaining_buf
+
+len_is_zero:
+
+       modulo_base                                                     // adler sum2 modulo BASE
+
+       add             r0, adler, sum2, lsl #16                // to return sum2<<16 | adler 
+
+       pop             {r4-r6, r8-r11, pc}
+
+       .align 2
+coeffs:
+       .long   -2146992015
+       .long   -BASE
+       .long   NMAX
+
+#else  // KERNEL_SUPPORT_NEON
+
+
+
+       #define adler   r0
+       #define sum2    r1
+       #define buf             r2
+       #define len             r3      
+       #define nmax    r4
+       #define vecs    lr                              // vecs = NMAX/16
+       #define n               r5
+
+       #define t               r12
+
+       #define sum2_coeff              q0
+       #define sum2_coeff0             d0
+       #define sum2_coeff1             d1
+       #define alder_coeff             q1
+       #define ones                    d2
+       #define x0_x15                  q2
+       #define x0_x7                   d4
+       #define x8_x15                  d5
+       #define adlersum2               d6
+       #define adler16                 d25
+
+#if defined _ARM_ARCH_7 
+
+       adr                     t, vec_table                            // address to vec_table[]
+       stmfd           sp!, {r4, r5, lr}
+
+       vld1.32         {q0-q1},[t,:128]!                       // loading up coefficients for adler/sum2 computation
+       vld1.32         {q15},[t,:128]!                         // for sum2 computation
+       ldr                     nmax, [t]                                       // NMAX
+
+       vmov            adlersum2, sum2, adler          // pack up adler/sum2 into a double register 
+
+       cmp                     len, nmax                                       // len vs NMAX
+       lsr                     vecs, nmax, #4                          // vecs = NMAX/16;
+       blt                     len_lessthan_NMAX                       // if (len < NMAX) skip the while loop          
+
+       sub                     len, nmax                                       // pre-decrement len by NMAX
+
+while_len_ge_NMAX_loop:                                        // while (len>=NMAX) {
+
+       mov                     n, vecs, lsr #1                 // n = NMAX/16; 
+
+do_loop:                                                                       // do {
+
+       vshll.u32       q12, adlersum2, #5                      // d25 = (0,32*adler) to be added into (adler,sum2)
+       vld1.32         {x0_x15},[buf,:128]!            // 16-byte input x0:x15
+       vmull.u8        q8, x0_x7, ones                         // 16-bit x0-x7
+       vld1.32         {q14}, [buf,:128]!                      // x16:x31
+       vmull.u8        q9, x8_x15, ones                        // 16-bit x8-x15
+       vadd.u32        adlersum2,adler16                       // sum2 += old adler*32;
+       vmull.u8        q12, d28, ones                          // 16-bit x16-x23
+       vmull.u8        q13, d29, ones                          // 16-bit x24-x31
+       vmull.u8        q10, d28, sum2_coeff0           // 16-bit x16*16, x17*15, ..., x23*9
+       vmull.u8        q11, d29, sum2_coeff1           // 16-bit x24*8, x25*7, ..., x31*1      
+       vadd.u16        q8, q8, q9                                      // q8 = (x0+x8):(x7+x15) 8 16-bit elements for adler
+       vmull.u8        q9, x0_x7, d30                          // 16-bit x0*32,...,x7*25
+       vmull.u8        q14, x8_x15, d31                        // 16-bit x8*24,...,x15*17
+       vadd.u16        q12, q12, q13                           // q12 = (x16+x24):(x23+x31) 8 16-bit elements for adler
+       vadd.u16        q10, q11                                        // 8 16-bit elements for sum2
+       vadd.u16        q8, q12                                         // 8 16-bit elements for adler
+       vadd.u16        q9, q14                                         // 8 16-bit elements for sum2 
+       vadd.u16        q10, q9                                         // 8 16-bit elements for sum2
+       vpaddl.u16      q8, q8                                          // 4 32-bit elements for adler
+       vpaddl.u16      q10, q10                                        // 4 32-bit elements for sum2
+       vpadd.u32       d16,d16,d17                                     // 2 32-bit elements for adler
+       vpadd.u32       d17,d20,d21                                     // 2 32-bit elements for sum2
+       subs            n, #1                                           //  --n 
+       vpadd.u32       d4,d17,d16                                      // s8 : 32-bit elements for sum2, s9 : 32-bit element for adler
+       vadd.u32        adlersum2,d4                            // update adler/sum2 with the new 16 bytes input
+
+       bgt                     do_loop                                         // } while (--n);
+
+       vshll.u32       q12, adlersum2, #4                      // d25 = (0,16*adler) to be added into (adler,sum2)
+
+       vld1.32         {x0_x15},[buf,:128]!            //      16-byte input
+
+       vmull.u8        q8, x0_x7, ones                         // 16-bit x0-x7
+       vmull.u8        q9, x8_x15, ones                        // 16-bit x8-x15
+       vmull.u8        q10, x0_x7, sum2_coeff0         // 16-bit x0*16, x1*15, ..., x7*9
+       vmull.u8        q11, x8_x15, sum2_coeff1        // 16-bit x8*8, x9*7, ..., x15*1        
+
+       vadd.u16        q8, q8, q9                                      // 8 16-bit elements for adler
+       vadd.u16        q10, q10, q11                           // 8 16-bit elements for sum2
+       vpaddl.u16      q8, q8                                          // 4 32-bit elements for adler
+       vpaddl.u16      q10, q10                                        // 4 32-bit elements for sum2
+       vpadd.u32       d16,d16,d17                                     // 2 32-bit elements for adler
+       vpadd.u32       d17,d20,d21                                     // 2 32-bit elements for sum2
+       vadd.u32        adlersum2,adler16                       // sum2 += old adler;
+       vpadd.u32       d4,d17,d16                                      // s8 : 32-bit elements for sum2, s9 : 32-bit element for adler
+       vadd.u32        adlersum2,d4                            // update adler/sum2 with the new 16 bytes input
+
+       // mod(alder,BASE); mod(sum2,BASE);
+       vmull.u32       q2,adlersum2,d3[1]                      // alder/BASE, sum2/BASE in Q47
+       vshr.u64        q2,q2,#47                                       // take the integer part
+       vpadd.u32       d4,d4,d5                                        // merge into a double word in d4
+       vmls.u32        adlersum2,d4,d3[0]                      // (adler,sum2) -= floor[(adler,sum2)/BASE]*BASE
+
+       subs            len, nmax                                       // len -= NMAX;
+       bge                     while_len_ge_NMAX_loop          // repeat while len >= NMAX
+
+       add                     len, nmax                                       // post-increment len by NMAX
+
+len_lessthan_NMAX:
+
+       cmp                     len, #0
+       beq                     len_is_zero                                     // if len==0, branch to skip the following
+
+
+       subs            len, #32                                        // pre-decrement len by 32
+       blt                     len_lessthan_32                         // if len < 32, branch to len16_loop 
+
+len32_loop:
+
+       vshll.u32       q12, adlersum2, #5                      // d25 = (0,32*adler) to be added into (adler,sum2)
+       vld1.32         {x0_x15},[buf,:128]!            // 16-byte input x0:x15
+       vmull.u8        q8, x0_x7, ones                         // 16-bit x0-x7
+       vld1.32         {q14}, [buf,:128]!                      // x16:x31
+       vmull.u8        q9, x8_x15, ones                        // 16-bit x8-x15
+       vadd.u32        adlersum2,adler16                       // sum2 += old adler*32;
+       vmull.u8        q12, d28, ones                          // 16-bit x16-x23
+       vmull.u8        q13, d29, ones                          // 16-bit x24-x31
+       vmull.u8        q10, d28, sum2_coeff0           // 16-bit x16*16, x17*15, ..., x23*9
+       vmull.u8        q11, d29, sum2_coeff1           // 16-bit x24*8, x25*7, ..., x31*1      
+       vadd.u16        q8, q8, q9                                      // q8 = (x0+x8):(x7+x15) 8 16-bit elements for adler
+       vmull.u8        q9, x0_x7, d30                          // 16-bit x0*32,...,x7*25
+       vmull.u8        q14, x8_x15, d31                        // 16-bit x8*24,...,x15*17
+       vadd.u16        q12, q12, q13                           // q12 = (x16+x24):(x23+x31) 8 16-bit elements for adler
+       vadd.u16        q10, q11                                        // 8 16-bit elements for sum2
+       vadd.u16        q8, q12                                         // 8 16-bit elements for adler
+       vadd.u16        q9, q14                                         // 8 16-bit elements for sum2 
+       vadd.u16        q10, q9                                         // 8 16-bit elements for sum2
+       vpaddl.u16      q8, q8                                          // 4 32-bit elements for adler
+       vpaddl.u16      q10, q10                                        // 4 32-bit elements for sum2
+       vpadd.u32       d16,d16,d17                                     // 2 32-bit elements for adler
+       vpadd.u32       d17,d20,d21                                     // 2 32-bit elements for sum2
+       subs            len, #32                                        // len -= 32; 
+       vpadd.u32       d4,d17,d16                                      // s8 : 32-bit elements for sum2, s9 : 32-bit element for adler
+       vadd.u32        adlersum2,d4                            // update adler/sum2 with the new 16 bytes input
+
+       bge                     len32_loop
+
+len_lessthan_32:
+
+       adds            len, #(32-16)                           // post-increment len by 32, then pre-decrement by 16
+       blt                     len_lessthan_16                         // if len < 16, branch to len_lessthan_16
+
+       vshll.u32       q12, adlersum2, #4                      // d25 = (0,16*adler) to be added into (adler,sum2)
+
+       vld1.32         {x0_x15},[buf,:128]!            //      16-byte input
+
+
+       vmull.u8        q8, x0_x7, ones                         // 16-bit x0-x7
+       vmull.u8        q9, x8_x15, ones                        // 16-bit x8-x15
+       vmull.u8        q10, x0_x7, sum2_coeff0         // 16-bit x0*16, x1*15, ..., x7*9
+       vmull.u8        q11, x8_x15, sum2_coeff1        // 16-bit x8*8, x9*7, ..., x15*1        
+
+       vadd.u16        q8, q8, q9                                      // 8 16-bit elements for adler
+       vadd.u16        q10, q10, q11                           // 8 16-bit elements for sum2
+       vpaddl.u16      q8, q8                                          // 4 32-bit elements for adler
+       vpaddl.u16      q10, q10                                        // 4 32-bit elements for sum2
+       vpadd.u32       d16,d16,d17                                     // 2 32-bit elements for adler
+       vpadd.u32       d17,d20,d21                                     // 2 32-bit elements for sum2
+       subs            len, #16                                        // decrement len by 16
+       vadd.u32        adlersum2,adler16                       // sum2 += old adler;
+       vpadd.u32       d4,d17,d16                                      // s8 : 32-bit elements for sum2, s9 : 32-bit element for adler
+       vadd.u32        adlersum2,d4                            // update adler/sum2 with the new 16 bytes input
+
+len_lessthan_16:
+       adds            len, #16                                        // post-increment len by 16
+       beq                     len_is_zero_internal            // if len==0, branch to len_is_zero_internal
+
+       // restore adler/sum2 into general registers for remaining (<16) bytes
+
+       vmov            sum2, adler, adlersum2
+remaining_len_loop:
+       ldrb            t, [buf], #1                            // *buf++;
+       subs            len, #1                                         // len--;
+       add                     adler,t                                         // adler += *buf
+       add                     sum2,adler                                      // sum2 += adler
+       bgt                     remaining_len_loop                      // break if len<=0
+
+       vmov            adlersum2, sum2, adler          // move to double register for modulo operation
+
+len_is_zero_internal:
+
+       // mod(alder,BASE); mod(sum2,BASE);
+
+       vmull.u32       q2,adlersum2,d3[1]                      // alder/BASE, sum2/BASE in Q47
+       vshr.u64        q2,q2,#47                                       // take the integer part
+       vpadd.u32       d4,d4,d5                                        // merge into a double word in d4
+       vmls.u32        adlersum2,d4,d3[0]                      // (adler,sum2) -= floor[(adler,sum2)/BASE]*BASE
+
+len_is_zero:
+
+       vmov        sum2, adler, adlersum2              // restore adler/sum2 from (s12=sum2, s13=adler)
+       add                     r0, adler, sum2, lsl #16        // to return adler | (sum2 << 16);
+       ldmfd       sp!, {r4, r5, pc}                   // restore registers and return 
+
+
+       // constants to be loaded into q registers
+       .align  4               // 16 byte aligned
+
+vec_table:
+
+       // coefficients for computing sum2
+       .long   0x0d0e0f10              // s0
+       .long   0x090a0b0c              // s1
+       .long   0x05060708              // s2
+       .long   0x01020304              // s3
+
+       // coefficients for computing adler
+       .long   0x01010101              // s4/d2
+       .long   0x01010101              // s5
+
+       .long   BASE                    // s6 : BASE 
+       .long   0x80078071              // s7 : 1/BASE in Q47
+
+       // q15 : d30.d31
+       .long   0x1d1e1f20              // s0
+       .long   0x191a1b1c              // s1
+       .long   0x15161718              // s2
+       .long   0x11121314              // s3
+
+NMAX_loc:
+       .long   NMAX                    // NMAX
+       
+#endif         // _ARM_ARCH_7
+
+#endif         //  (!KERNEL_SUPPORT_NEON) || (!defined _ARM_ARCH_7)
+
+#endif         // _ARM_ARCH_6
+
diff --git a/libkern/zlib/arm/inffastS.s b/libkern/zlib/arm/inffastS.s
new file mode 100644 (file)
index 0000000..9885579
--- /dev/null
@@ -0,0 +1,571 @@
+#include <arm/arch.h>
+
+// the follow assembly code was hard wired to POSTINC not defined, 
+
+#if 0                  // #ifdef POSTINC
+#  define OFF 0
+#  define PUP(a) *(a)++
+#else
+#  define OFF 1
+#  define PUP(a) *++(a)
+#endif
+
+// the code uses r9, therefore, it does not meet the register protocol for armv5 and below
+// the code can only be used for armv6 and above
+
+#if defined _ARM_ARCH_6 
+
+       .cstring
+       .align 2
+LC0:
+       .ascii "invalid distance too far back\0"
+       .align 2
+LC1:
+       .ascii "invalid distance code\0"
+       .align 2
+LC2:
+       .ascii "invalid literal/length code\0"
+
+       // renaming the register and stack memory use
+
+       #define         out                     r0
+       #define         strm            r10
+       #define         state           r5
+       #define         in                      r11
+       #define         write           r9
+       #define         distcode        r8
+       #define         bits            lr
+       #define         hold            r4
+
+       // stack memory allocation
+
+       #define         window_loc      [sp,#0]
+       #define         last_loc        [sp,#4]
+       #define         beg_loc         [sp,#8]
+       #define         end_loc         [sp,#12]
+       #define         wsize_loc       [sp,#16]
+       #define         whave_loc       [sp,#20]
+       #define         windowm1_loc    [sp,#28]
+       #define         lmask_loc       [sp,#32]
+       #define         dmask_loc       [sp,#36]
+       #define         dist_loc        [sp,#48]
+
+       #define         local_size      52
+
+       // the following defines the variable offset in the inflate_state structure     (in inflate.h)
+
+       #define         state_mode              [state, #0]
+       #define         state_last              [state, #4]
+       #define         state_wrap              [state, #8]
+       #define         state_havedict  [state, #12]
+       #define         state_flags             [state, #16]
+       #define         state_dmax              [state, #20]
+       #define         state_wbits             [state, #36]
+       #define         state_wsize             [state, #40]
+       #define         state_whave             [state, #44]
+       #define         state_write             [state, #48]
+       #define         state_window    [state, #52]
+       #define         state_hold              [state, #56]
+       #define         state_bits              [state, #60]
+       #define         state_lencode   [state, #76]
+       #define         state_distcode  [state, #80]
+       #define         state_lenbits   [state, #84]
+       #define         state_distbits  [state, #88]
+
+
+// void inflate_fast(z_streamp strm, unsigned start)
+// input :     
+//                     r0 = strm, (move to r10) 
+//                     r1 = start      
+
+       .text
+       .align 2
+       .globl _inflate_fast
+_inflate_fast:
+
+       stmfd   sp!, {r4-r6,r8-r11,lr}
+       sub             sp, sp, #local_size
+
+#if defined(_ARM_ARCH_5)
+       ldrd    r2,r3,[r0, #0]                  // r2 = strm->next_in, r3 = strm->avail_in
+#else
+       ldmia   r0, {r2-r3}
+#endif
+
+       sub             in, r2, #OFF                    // in = strm->next_in - OFF; 
+       sub             r2, #(OFF+5)                    // next_in -= (OFF+5);
+       ldr             state, [r0, #28]                // state = (struct inflate_state FAR *)strm->state;
+       add             r3, r3, r2                              // last = next_in - OFF + (avail_in - 5);       next_in already updated
+       mov             strm, r0
+       str             r3, last_loc                    // store last to release r3
+
+       ldr             r3, [r0, #12]                   // next_out
+       ldr             r2, [strm, #16]                 // avail_out
+
+       sub             out, r3, #OFF                   // out = strm->next_out - OFF; r0 is used as out from this point on
+
+       sub             r3, r3, #256                    // next_out - 256
+       rsb             r1, r2, r1                              // start - avail_out
+       sub             r3, r3, #(1+OFF)                // next_out-OFF-257 
+       add             r3, r3, r2                              // r3 = end = avail_out + (next_out-OFF) - 257 = avail_out + out - 257
+       rsb             r2, r1, out                             // r2 = beg = out - (start - avail_out);
+#if defined(_ARM_ARCH_5)
+       strd    r2,r3, beg_loc                  // store beg/end
+       ldrd    r2,r3, state_wsize              // wsize/whave
+       strd    r2,r3, wsize_loc                // store wsize/whave
+       //ldrd  r6,hold, state_window   // window/hold, hold use r7
+       ldr             r6, state_window                // state->window
+       ldr             hold, state_hold                // state->hold
+       nop
+#else
+       // for architecture < armv5, ldrd/strd is not available
+       str             r2, beg_loc                             // store beg
+       str             r3, end_loc                             // store end
+       ldr             r2, state_wsize                 // state->wsize
+       ldr             r3, state_whave                 // state->whave
+       str             r2, wsize_loc                   // store wsize
+       str             r3, whave_loc                   // store whave
+       ldr             r6, state_window                // state->window
+       ldr             hold, state_hold                // state->hold
+#endif
+
+       ldr             ip, state_lencode               // lencode
+       mov             r3, #1                                  // used to derive lmask and dmask
+       ldr             write, state_write              // write (r9 from this point on) : window write index
+       nop
+       str             ip, [sp, #40]                   // save lencode
+       sub             ip, r6, #1                              // window-1
+       str             r6, window_loc                  // store window
+       str             ip, windowm1_loc                // store window-1
+       ldr             r2, state_lenbits               // lenbits
+       ldr             bits, state_bits                // bits, use lr from this point on
+       ldr             distcode, state_distcode// distcode, use r8
+       mov             r2, r3, asl r2                  // (1<<lensbits)
+       ldr             r12, state_distbits             // distbits
+       sub             r2, r2, #1                              // lmask = (1U << state->lenbits) - 1;
+       mov             r3, r3, asl r12                 // (1U << state->distbits)
+       sub             r3, r3, #1                              // dmask = (1U << state->distbits) - 1;
+
+#if defined(_ARM_ARCH_5)
+       strd    r2, r3, lmask_loc               // store lmask/dmask
+#else
+       str             r2, lmask_loc                   // lmask
+       str             r3, dmask_loc                   // dmask
+#endif
+
+       // start the do loop decoding literals and length/distances 
+       // until end-of-block or not enough input data or output space
+
+do_loop:
+       cmp             bits, #15                               // bits vs 15
+       ldr             r1, lmask_loc                   // lmask
+       bge             bitsge15                                // if bits >= 15, skip loading new 16 bits      
+
+       // this is a shortcut with the processor reads data in little-endian mode
+       ldrh    r3, [in,#1]                                     // read 2 bytes 
+       add             in, #2                                          // in pointer += 2
+       add             hold, hold, r3, asl bits        // deposit the new 2 bytes into hold
+       add             bits, #16                                       // bits count += 16
+
+bitsge15:
+       ldr             ip, [sp, #40]                   // restore lencode
+       and             r3, hold, r1                            // r3 = hold & lmask
+       b               dolen
+
+op_not_zero:
+
+       tst     r2, #16                                                 // if (op&16)
+       bne     length_base                                             //              branch to length_base
+
+       tst     r2, #64                                                 // else if (op&64) 
+       bne     end_of_block                                    //              branch to end_of_block processing 
+
+       // 2nd-level length code, this is the part where if ((op & 64) == 0) { ... }
+
+       // this.val + (hold & ((1U << op) - 1)); 
+       // r3 = r1 + hold & ((1<<r2)-1);
+
+       rsb             r12, r2, #32                            // r12 = (32-op)
+       ror     r3, hold, r2                            // rotate the op least significant bits of hold to MSB
+       add             r3, r1, r3, lsr r12                     // r3 = r1 + (op LSBs in hold) = r1 + hold & ((1<<r2)-1); 
+
+       ldr             ip, [sp, #40]                   // restore lencode
+
+dolen:
+
+       // code -> 8-bit code, 8-bit bits, 16-bit val
+       ldrb    r2, [ip,r3,asl #2]              // op = (unsigned)(this.bits);
+       add             r3, ip, r3, asl #2              // r3 = this
+       ldrb    ip, [r3, #1]                            // ip = this.bits
+       ldrh    r1, [r3, #2]                            // r1 = this.value
+       cmp             r2, #0                                          // op == 0 ?
+
+       mov             hold, hold, lsr ip                      // hold >>= this.bits
+       rsb             bits, ip, bits                          // bits -= this.bits
+       bne             op_not_zero                                     // branch to op_not_zero if this.op != 0
+
+       strb    r1, [out, #1]!                          // PUP(out) = (unsigned char)(this.val);
+
+do_loop_while:
+       ldr             r1, last_loc                            // last
+       ldr             r2, end_loc                                     // end
+       cmp             in, r1                                          // compare in vs last 
+       cmpcc   out, r2                                         // if in < last, compare out vs end
+       bcc             do_loop                                         // if (in < last && out < end) go back to do_loop
+
+update_state_and_return:
+
+       sub             r2, in, bits, lsr #3            // r2 = in - (bits>>3)
+
+       add             r3, r2, #OFF                            // r3 = (in - (bits>>3)) + OFF
+       str             r3, [strm, #0]                          // strm->next_in = in + OFF;
+
+       add             r3, out, #OFF                           // out + OFF
+       str             r3, [strm, #12]                         // strm->next_out = out + OFF;
+
+       ldr             r3, last_loc                            // r3 = last
+       ldr             ip, end_loc                                     // ip = end
+
+       cmp             r3, r2                                          // compare last vs in
+       addhi   r3, r3, #5                                      // if last > in, last +=5
+       movls   r6, r3                                          // o.w., r6 = last
+       rsbls   r3, r6, r2                                      //       r3 = in-last
+       rsbhi   r3, r2, r3                                      // r3 = (last+5) - in
+       rsbls   r3, r3, #5                                      // r3 = 5 - (in-last);
+       cmp             out, ip                                         // compare out vs end
+       str             r3, [strm, #4]                          // strm->avail_in = (unsigned)(in < last ? 5 + (last - in) : 5 - (in - last));
+       movcs   r2, ip                                          // if out<end, r2=end
+       addcc   r3, ip, #256                            // if out>=end, r3 = end+256
+       rsbcs   r3, r2, out                                     // if out<end, r3 = out-end
+       addcc   r3, r3, #1                                      // if out>=end, r3 = end+257
+       rsbcs   r3, r3, #256                            // if out<end, r3 = 256-(out-end) = 256 + (end-out)
+       and             bits, #7                                        // this is equivalent to bits -= (bits>>3) << 3;
+       rsbcc   r3, out, r3                                     // if out<end, r3 = 257+end-out
+       addcs   r3, r3, #1                                      // if out>=end, r3 = 257 + (end-out)
+       str             r3, [strm, #16]                         // strm->avail_out = (unsigned)(out < end ?  257 + (end - out) : 257 - (out - end)); 
+
+       // hold &= (1U << bits) - 1;
+
+       rsb             ip, bits, #32                           // 32-bits
+    ror        hold, hold, bits                        // this is equivalent to hold<<(32-bits)
+    lsr        hold, hold, ip                          // logical shift right by (32-bits), hold now only keeps the bits LSBs
+
+       str             bits, state_bits                        // state->bits = bits;
+       str             hold, state_hold                        // state->hold = hold;
+
+       add             sp, #local_size                         // pop out stack memory
+       ldmfd   sp!,{r4-r6,r8-r11,pc}                           // restore registers and return
+
+length_base:                                                   // r2=op, r1=lmask
+       ands    r2, r2, #15                                     // op&=15;
+       mov             r6, r1                                          // len = (unsigned) this.val;
+       beq             op_is_zero                                      // if op==0, branch to op_is_zero
+       cmp             r2, bits                                        // op vs bits
+       ldrhib  r3, [in, #1]!                           // if (op>bits) r3 = (PUP(in));
+       addhi   hold, hold, r3, asl bits        // if (op>bits) hold += (unsigned long)(PUP(in)) << bits;
+
+       rsb             ip, r2, #32                                     // 32-op
+    ror        r3, hold, r2                            // (hold<<(32-op))
+       add             r6, r1, r3, lsr ip                      // len += (unsigned)hold & ((1U << op) - 1);
+
+       addhi   bits, bits, #8                          // if (op>bits) bits += 8;
+
+       mov             hold, hold, lsr r2                      // hold >>= op;
+       rsb             bits, r2, bits                          // bits -= op;
+
+op_is_zero:
+       cmp             bits, #14
+       ldrh    r3,[in,#1]                  // if (bits < 15) { 2 (PUP(in));  no condition code for better performance
+    addls   in, #2                      //     in+=2;
+    addls   hold, hold, r3, asl bits    //     twice hold += (unsigned long)(PUP(in)) << bits;
+    addls   bits, #16                   //     2 bits += 8; }
+
+dodist:
+
+       ldr             r2, dmask_loc                           // r2 = dmask
+       and             r3, hold, r2                            // r3 = hold & dmask
+       mov             r2, r3, asl #2
+       add             r3, r2, distcode                        // &dcode[hold&dmask];
+       ldrb    ip, [r2, distcode]                      // op
+       ldrh    r1, [r3, #2]                            // dist = (unsigned)(this.val);
+       tst             ip, #16                                         // op vs 16
+       ldrb    r3, [r3, #1]                            // this.bits
+       mov             hold, hold, lsr r3                      // hold >>= this.bits;
+       rsb             bits, r3, bits                          // bits -= this.bits;
+       bne             distance_base                           // if (op&16) { distance base processing  }     
+       tst             ip, #64                                         // 
+       beq             second_distance_code            // else if ((op&64)==0) branch to 2nd level distance code
+
+       b               invalide_distance_code
+
+check_2nd_level_distance_code:
+
+       tst             r2, #64                                         // check for esle if ((op & 64) == 0) for 2nd level distance code
+       bne             invalide_distance_code
+
+second_distance_code:
+
+       rsb             r2, ip, #32                                     // 32-op
+       ror             r3, hold, ip                            // hold<<(32-op)
+       add             r3, r1, r3, lsr r2                      // this.val + (hold & ((1U << op) - 1))
+
+       mov             r2, r3, asl #2
+       add             r3, r2, distcode                        // this = dcode[this.val + (hold & ((1U << op) - 1))];
+       ldrb    r2, [r2, distcode]                      // this.op
+       ldrh    r1, [r3, #2]                            // this.val
+
+       tst             r2, #16                                         // op&16
+       ldrb    r3, [r3, #1]                            // this.bits
+       mov             ip, r2                                          // op
+       mov             hold, hold, lsr r3                      // hold >> = this.bits
+       rsb             bits, r3, bits                          // bits -= this.bits
+       beq             check_2nd_level_distance_code
+
+distance_base:                 // this is invoked from if ((op&16)!=0)
+
+       and             r2, ip, #15                                     // op &= 15;
+       cmp             r2, bits                                        // op vs bits
+       ldrhib  r3, [in, #1]!                           // if (op > bits) (PUP(in))
+       addhi   hold, hold, r3, asl bits        //              hold += (unsigned long)(PUP(in)) << bits;
+       addhi   bits, bits, #8                          //              bits += 8;      
+       cmphi   r2, bits                                        //              internel (bits < op)
+       ldrhib  r3, [in, #1]!                           //              if (op > bits) (PUP(in))
+       addhi   hold, hold, r3, asl bits        //                      hold += (unsigned long)(PUP(in)) << bits;
+
+       rsb             ip, r2, #32                                     // (32-op)
+       ror             r3, hold, r2                            // hold<<(32-op)
+       add             r3, r1, r3, lsr ip                      // dist += (unsigned)hold & ((1U << op) - 1);
+
+       ldr             ip, beg_loc                                     // beg
+
+#ifdef INFLATE_STRICT
+       ldr     r1, state_dmax                          // r1 = dmax
+#endif
+
+       str             r3, dist_loc                            // save dist
+
+#ifdef INFLATE_STRICT
+       cmp             r3, r1                                                          // dist vs dmax 
+       bgt             invalid_distance_too_far_back           // if dist > dmax, set up msg/mode = bad and break
+#endif
+
+       ldr             r1, dist_loc                            // dist
+       rsb             r3, ip, out                                     // (out - beg);
+       addhi   bits, bits, #8                          // this is the internel bits += 8 from above
+
+       cmp             r1, r3                                          // dist vs (out - beg) 
+
+       mov             hold, hold, lsr r2                      // hold >>= op ;
+       rsb             bits, r2, bits                          // bits -= op;
+       rsbls   r2, r1, out                                     // if (dist<=op) r2 = from = out-dist
+       bls             copy_direct_from_output         // if (dist<=op) branch to copy_direct_from_output
+
+       ldr             r2, whave_loc                                   // whave
+       rsb             r1, r3, r1                                              // op = dist-op
+       cmp             r2, r1                                                  // whave vs op
+       nop                                                                             // pad dummy for better performance
+       bcc             invalid_distance_too_far_back   // if whave < op,  message invalid distance too far back, and break
+
+       cmp             write, #0                                               // write
+       bne             non_very_common_case                    // if (write ==0) non_very_common_case
+
+       // the following : if (write == 0) { /* very common case */ }
+       nop                                                                             // pad dummy for better performance
+       ldr             ip, wsize_loc                                   // wsize
+       cmp             r6, r1                                                  // len vs op 
+       rsb             r3, r1, ip                                              // wsize - op
+       ldr             ip, windowm1_loc                                // window - 1
+       add             r2, ip, r3                                              // from = window - 1 + wsize - op : setup for using PUP(from)
+       movhi   r3, r1                                                  // if len > op, r3 = op
+       movhi   r1, out                                                 // if len > op, r1 = out
+       bhi             some_from_window                                // if (len > op), branch to some_from_window
+
+finish_copy:
+
+       //      while (len > 2) { 
+       //              PUP(out) = PUP(from); 
+       //              PUP(out) = PUP(from); 
+       //              PUP(out) = PUP(from); 
+       //              len -= 3; 
+       //      } 
+       //      if (len) { 
+       //              PUP(out) = PUP(from); 
+       //              if (len > 1) 
+       //              PUP(out) = PUP(from); 
+       //      }
+
+       cmp             r6, #2                                                  // len > 2 ?
+       movls   r1, r6                                                  // if (len<=2) r1 = len
+       bls             lenle2                                                  // if (len<=2) branch to lenle2
+       mov             r1, r6
+fcopy_per3bytes:
+       ldrb    r3, [r2, #1]                                    // 1st PUP(from)
+       sub             r1, r1, #3                                              // len-=3
+       cmp             r1, #2                                                  // len > 2 ?
+       strb    r3, [out, #1]                                   // 1st PUP(out) = PUP(from);
+       ldrb    r3, [r2, #2]                                    // 2nd PUP(from)
+       add             r2, r2, #3                                              // from+=3
+       strb    r3, [out, #2]                                   // 2nd PUP(out) = PUP(from);
+       ldrb    r3, [r2, #0]                                    // 3rd PUP(from)
+       add             out, out, #3                                    // out+=3
+       strb    r3, [out, #0]                                   // 3rd PUP(out) = PUP(from);
+       bgt             fcopy_per3bytes                                 // while (len>3) back to loop head      
+lenle2:
+       cmp             r1, #0                                                  // len
+       beq             do_loop_while                                   // back to while loop head if len==0    
+       ldrb    r3, [r2, #1]                                    // PUP(from)
+       cmp             r1, #2                                                  // check whether len==2
+       strb    r3, [out, #1]!                                  // PUP(out) = PUP(from);
+       bne             do_loop_while                                   // back to while loop head if len==1 
+       ldrb    r3, [r2, #2]                                    // 2nd PUP(from)
+       strb    r3, [out, #1]!                                  // 2nd PUP(out) = PUP(from);
+       b               do_loop_while                                   // back to while loop head
+
+end_of_block:
+       tst             r2, #32                                         // if (op&32)
+       movne   r3, #11                                         //   TYPE?
+       strne   r3, state_mode                          // state-mode = TYPE
+       bne             update_state_and_return         // break the do loop and branch to get ready to return
+       ldr             r3, messages                            // "invalid literal/length code" message
+L75:
+       add             r3, pc, r3
+       str             r3, [strm, #24]                         // strm->msg = (char *)"invalid literal/length code";
+       mov             r3, #27                                         // BAD?
+       str             r3, state_mode                          // state->mode = BAD;
+       b               update_state_and_return         // break the do loop and branch to get ready to return
+
+//Read_2_bytes:
+//     ldrh    r3,[in,#1]                                      // 2 (PUP(in)) together
+//     add             in, #2                                          // 2 in++
+//     add             hold, hold, r3, asl bits        // twice hold += (unsigned long)(PUP(in)) << bits;
+//     add             bits, #16                                       // 2 bits += 8;
+//     b               dodist                                          // branch to dodist 
+       nop                                                                     // a pad dummy instruction to give better performance
+
+copy_direct_from_output:                               // r2 = from = out - dist ;
+
+                                                                               // do {
+       ldrb    r3, [r2, #1]                            //      1st PUP(from)
+       sub             r6, r6, #3                                      //      len-=3
+       cmp             r6, #2                                          //      len vs 2
+       strb    r3, [out, #1]                           //      1st PUP(out) = PUP(from);
+       ldrb    r3, [r2, #2]                            //      2nd PUP(from)
+       add             r2, r2, #3                                      //      update from+=3
+       strb    r3, [out, #2]                           //      2nd PUP(out) = PUP(from);
+       ldrb    r3, [r2, #0]                            //      3rd PUP(from);
+       add             out, out, #3                            //      update out+=3
+       strb    r3, [out, #0]                           //      3rd PUP(out) = PUP(from);
+       bhi             copy_direct_from_output         // while (len>2);
+
+       // len in r6 can now be 0 1 or 2
+
+       subs    r6,#1                                           // len--;
+    ldrb    r3, [r2, #1]                               // PUP(from)
+    blt     do_loop_while                              // if len<0 back to while loop head
+    strb    r3, [out, #1]!                             // PUP(out) = PUP(from);
+    subs    r6, #1                                             // len--;
+    ldrb    r3, [r2, #2]                               // 2nd PUP(from)
+    blt     do_loop_while                              // if len<0 back to while loop head
+    strb    r3, [out, #1]!                             // 2nd PUP(out) = PUP(from);
+    b       do_loop_while                              // back to while loop head
+
+
+invalide_distance_code:
+       ldr             r3, messages+4                          // "invalid distance code"
+L72:
+       add             r3, pc, r3
+       str             r3, [strm, #24]                         // strm->msg = (char *)"invalid distance code";
+       mov             r3, #27
+       str             r3, state_mode                          // state->mode = BAD;
+       b               update_state_and_return         // break, restore registers, and return
+
+
+some_from_window:
+       add             out, r3, out                            // out += op
+       rsb             r6, r3, r6                                      // len -= op 
+some_from_window_loop:                                 // do {
+       ldrb    ip, [r2, #1]!                           //              PUP(from);
+       subs    r3, r3, #1                                      //              --op    
+       strb    ip, [r1, #1]!                           //              PUP(out) = PUP(from);
+       bne             some_from_window_loop           // } while(op);
+       ldr             r3, dist_loc                            // dist
+       rsb             r2, r3, out                                     // from = out - dist;
+       b               finish_copy
+
+non_very_common_case:
+       cmp             write, r1                                       // write vs op
+       nop                                                                     // pad dummy for better performance
+       bcs             contiguous_in_window            // if (write >= op) branch to contiguous_in_window
+
+       /* wrap around window */
+
+       ldr             r2, wsize_loc                           // wsize
+       ldr             ip, windowm1_loc                        // window-1
+       add             r3, write, r2                           // r3 = wsize+write
+       rsb             r3, r1, r3                                      // r3 = wsize+write-op
+       add             r2, ip, r3                                      // r2 = from = wsize+write-op+window-1;
+       rsb             r1, write, r1                           // op -= write;
+
+       cmp             r6, r1                                          // len vs op
+       bls             finish_copy                                     // if (len <= op) branch to finish_copy
+       rsb             r6, r1, r6                                      // len -= op
+waw_loop:                                                              // do {
+       ldrb    r3, [r2, #1]!                           //      PUP(from)
+       subs    r1, r1, #1                                      //  --op; 
+       strb    r3, [out, #1]!                          //  PUP(out) = PUP(from);
+       bne             waw_loop                                        // } while (op); 
+
+       cmp             write, r6                                       // write vs len
+       ldrcs   r2, windowm1_loc                        // if (write>=len) r2 = from = window-1;
+       bcs             finish_copy                                     // if (write>=len) branch to finish_copy
+
+       // some from start of window
+
+       mov             r1, write                               // op = write
+       sub             r6, write                               // len -= op
+       sub             ip, out
+       add             ip, #1                                  // out+ip -> from
+sow_loop:                                                      // do { 
+       ldrb    r3,[out, ip]                    //      PUP(from)
+       subs    r1, #1                                  //  --op;
+       strb    r3, [out,#1]!                   //  PUP(out) = PUP(from);
+       bne             sow_loop                                // } while (op);
+
+       ldr             r2, dist_loc                    // dist
+       sub             r6, r6, write                   // len -= write 
+       rsb             r2, r2, out                             // r2 = from = out-dist
+       b               finish_copy                             // continue to finish_copy
+
+
+contiguous_in_window:
+       ldr             ip, windowm1_loc                // window-1
+       cmp             r6, r1                                  // len vs op
+       rsb             r3, r1, write                   // r3 = write-op
+       add             r2, ip, r3                              // r2 = from = window+write-op-1
+       bls             finish_copy                             // if (len <= op) branch to finish_copy
+       rsb             r6, r1, r6                              // len -= op 
+       ldr             r3, dist_loc                    // dist
+ciw_loop:
+       ldrb    ip, [r2, #1]!                   // PUP(from)
+       subs    r1, r1, #1                              // op--
+       strb    ip, [out, #1]!                  // PUP(out) = PUP(from);
+       bne             ciw_loop                                // while (--op); 
+       rsb             r2, r3, out                             // from = out - dist;
+       b               finish_copy
+
+invalid_distance_too_far_back:
+       ldr             r3, messages+8                                  // "invalid distance too far back"
+L42:
+       add             r3, pc, r3
+       str             r3, [strm, #24]                                 // strm->msg = (char *)"invalid distance too far back";
+       mov             r3, #27
+       str             r3, state_mode                                  // state->mode = BAD;
+       b               update_state_and_return                 // break, restore registers, and return
+
+       .align 2
+messages:
+       .long   LC2-8-(L75)
+       .long   LC1-8-(L72)
+       .long   LC0-8-(L42)
+
+#endif // defined _ARM_ARCH_6
index 82d2795c0d9ebd8f432756ea5e012728bbd13a28..54f0ee81505d6dac8468eeda3772c08c80cdc8e8 100644 (file)
  * For conditions of distribution and use, see copyright notice in zlib.h
  */
 
+
+#if defined _ARM_ARCH_6
+
+       // dummy definition, for armv6 or above, compile code from inffastS.s
+       typedef char DummyDefinition;
+
+#else  // architecture
+
 #include "zutil.h"
 #include "inftrees.h"
 #include "inflate.h"
@@ -343,3 +351,5 @@ unsigned start;         /* inflate()'s starting value for strm->avail_out */
  */
 
 #endif /* !ASMINF */
+
+#endif // architecture
index 0366b6215a9786482964624710c221bb358dfc3e..60c9bee2f785ca679f34f6bb94da0b7ffdffee92 100644 (file)
@@ -250,7 +250,7 @@ ifeq (-arch armv6,$(ARCH_FLAGS_ARM))
 CFLAGS_ARM             += -mthumb
 endif
 ifeq (-arch armv5,$(ARCH_FLAGS_ARM))
-CFLAGS_ARM             += -mthumb
+CFLAGS_ARM             += -mno-thumb
 endif
 ifeq (-arch xscale,$(ARCH_FLAGS_ARM))
 CFLAGS_ARM             += -mthumb
@@ -394,7 +394,8 @@ export LDFLAGS_KERNEL_ARM     = \
        -Wl,-new_linker \
        -Wl,-pagezero_size,0x0 \
        -Wl,-segaddr,__HIB,0xC0000000 \
-       -Wl,-image_base,0xC0008000
+       -Wl,-image_base,0xC0008000 \
+       -Wl,-exported_symbols_list,$(TARGET)/kernel-kpi.exp
 
 
 export LDFLAGS_KERNEL  = $(LDFLAGS_KERNEL_GEN) \
index 618a7849f4aa60491ee1bbecd5d0cfce3805e133..3ba71308348ca474b9e39ee972677bdf311cd9ad 100644 (file)
@@ -570,6 +570,7 @@ do_build_mach_kernel: $(TARGET)/kgmacros $(TARGET)/mach_kernel
 
 $(TARGET)/mach_kernel: $(addprefix $(TARGET)/,$(foreach component,$(COMPONENT_LIST), $(addprefix $(component)/$(firstword $($(addsuffix _KERNEL_CONFIG, $(shell printf $(component) | tr a-z A-Z))) $(KERNEL_CONFIG))/, $(addsuffix .o, $(component))))) lastkernelconstructor.o
        $(_v)${MAKE} version.o
+       $(_v)${MAKE} build_mach_kernel_exports
        @echo LD mach_kernel.sys
        $(_v)$(CAT) $(addprefix $(TARGET)/,$(foreach component,$(COMPONENT_LIST), $(addprefix $(component)/$(firstword $($(addsuffix _KERNEL_CONFIG, $(shell printf $(component) | tr a-z A-Z))) $(KERNEL_CONFIG))/, $(addsuffix .o, $(component))))) > mach_kernel.filelist
        $(_v)$(LD) $(LDFLAGS_KERNEL) -filelist mach_kernel.filelist version.o lastkernelconstructor.o `if [ -e $(STATIC_KMODS) ]; then echo $(STATIC_KMODS); fi` \
@@ -606,6 +607,14 @@ lastkernelconstructor.o: $(SRCROOT)/libsa/lastkernelconstructor.c
 $(TARGET)/kgmacros: $(SRCROOT)/kgmacros
        $(_v)$(INSTALL) $(INSTALL_FLAGS) $? $@
 
+.PHONY: build_mach_kernel_exports
+build_mach_kernel_exports:
+       $(_v)${MAKE}                                    \
+               MAKEFILES=${SOURCE}/config/Makefile     \
+               SOURCE=${SOURCE}/config                 \
+               TARGET=$${TARGET}                       \
+       build_mach_kernel_exports;
+
 # Special rules to install machine configuration variants
 
 $(DSTROOT)$(INSTALL_FILE_DIR)mach.$(KERNEL_CONFIG_LC).$(MACHINE_CONFIG_LC): $(TARGET)/mach_kernel force_file_install
index 76e39eb65f95e286fb506448b8a06f5f8d0094ea..cadb1a9761cbef002b7f81e9d557a2fa98efa968 100644 (file)
@@ -230,6 +230,10 @@ options   CONFIG_EMBEDDED                       # <config_embedded>
 #
 options   CONFIG_ENFORCE_SIGNED_CODE           # <config_embedded>
 
+# support dynamic signing of code
+#
+options                CONFIG_DYNAMIC_CODE_SIGNING     # <dynamic_codesigning>
+
 # vc_progress_white - make the progress gear white instead of black
 options          CONFIG_VC_PROGRESS_WHITE              # <vc_progress_white>
 
index 29fa14890254216eb551ba9362fb28f2c2ecb0ae..ca0acb6d12352b821a69f08bf11b618247c76d61 100644 (file)
@@ -35,6 +35,7 @@
 #include <sys/errno.h>
 #include <string.h>
 #include <machine/machlimits.h>
+#include <pexpert/pexpert.h>
 
 extern struct vc_info vinfo;
 extern boolean_t panicDialogDesired;
@@ -51,7 +52,6 @@ static int panic_dialog_verify( const struct panicimage * data, unsigned int siz
 static int pixels_needed_to_blit_digit( int digit );
 static void blit_digit( int digit );
 static const char * strnstr(const char * s, const char * find, size_t slen);
-void dim_screen(void);
 static void panic_blit_rect(unsigned int x, unsigned int y, unsigned int width,
                            unsigned int height, int transparent,
                            const unsigned char * dataPtr);
@@ -839,40 +839,6 @@ decode_rle(const unsigned char *dataPtr, unsigned int *quantity,
 }
 
 
-void 
-dim_screen(void)
-{
-       unsigned int *p, *endp, *row;
-       int      col, rowline, rowlongs;
-       register unsigned int mask;
-
-       if(!vinfo.v_depth)
-               return;
-
-       if ( vinfo.v_depth == 32 )
-               mask = 0x007F7F7F;
-       else if ( vinfo.v_depth == 30 )
-               mask = (0x1ff<<20) | (0x1ff<<10) | 0x1ff;
-       else if ( vinfo.v_depth == 16 )
-               mask = 0x3DEF3DEF;
-       else
-               return;
-
-       rowline = (int)(vinfo.v_rowscanbytes / 4);
-       rowlongs = (int)(vinfo.v_rowbytes / 4);
-
-       p = (unsigned int*) vinfo.v_baseaddr;
-       endp = p + (rowlongs * vinfo.v_height);
-
-       for (row = p ; row < endp ; row += rowlongs) {
-               for (p = &row[0], col = 0; col < rowline; col++) {
-                       *p = (*p >> 1) & mask;
-                       ++p;
-               }
-       }
-}
-
-
 /* From user mode Libc - this ought to be in a library */
 static const char *
 strnstr(const char * s, const char * find, size_t slen)
index 49dc6da916fa6fad146f5bb5bad846014d0d68e4..8c0dc3bf25e7a85d071638734ddbc832a76a8c91 100644 (file)
@@ -2506,6 +2506,39 @@ initialize_screen(PE_Video * boot_vinfo, unsigned int op)
 #endif /* GRATEFULDEBUGGER */
 }
 
+void 
+dim_screen(void)
+{
+       unsigned int *p, *endp, *row;
+       int      col, rowline, rowlongs;
+       register unsigned int mask;
+
+       if(!vinfo.v_depth)
+               return;
+
+       if ( vinfo.v_depth == 32 )
+               mask = 0x007F7F7F;
+       else if ( vinfo.v_depth == 30 )
+               mask = (0x1ff<<20) | (0x1ff<<10) | 0x1ff;
+       else if ( vinfo.v_depth == 16 )
+               mask = 0x3DEF3DEF;
+       else
+               return;
+
+       rowline = (int)(vinfo.v_rowscanbytes / 4);
+       rowlongs = (int)(vinfo.v_rowbytes / 4);
+
+       p = (unsigned int*) vinfo.v_baseaddr;
+       endp = p + (rowlongs * vinfo.v_height);
+
+       for (row = p ; row < endp ; row += rowlongs) {
+               for (p = &row[0], col = 0; col < rowline; col++) {
+                       *p = (*p >> 1) & mask;
+                       ++p;
+               }
+       }
+}
+
 void vcattach(void); /* XXX gcc 4 warning cleanup */
 
 void
index 204a85ab6412c98e3bb05d7fe0dd037d5d8b038d..c623ba72eb04b17f7fe44a98a2add6d50e1142e1 100644 (file)
@@ -155,7 +155,7 @@ typedef struct _cframe_t {
 static unsigned panic_io_port;
 static unsigned        commit_paniclog_to_nvram;
 
-int debug_boot_arg;
+unsigned int debug_boot_arg;
 
 void
 machine_startup(void)
@@ -167,13 +167,14 @@ machine_startup(void)
             halt_in_debugger = halt_in_debugger ? 0 : 1;
 #endif
 
-       if (PE_parse_boot_argn("debug", &boot_arg, sizeof (boot_arg))) {
-               if (boot_arg & DB_HALT) halt_in_debugger=1;
-               if (boot_arg & DB_PRT) disable_debug_output=FALSE; 
-               if (boot_arg & DB_SLOG) systemLogDiags=TRUE; 
-               if (boot_arg & DB_NMI) panicDebugging=TRUE; 
-               if (boot_arg & DB_LOG_PI_SCRN) logPanicDataToScreen=TRUE;
-               debug_boot_arg = boot_arg;
+       if (PE_parse_boot_argn("debug", &debug_boot_arg, sizeof (debug_boot_arg))) {
+               if (debug_boot_arg & DB_HALT) halt_in_debugger=1;
+               if (debug_boot_arg & DB_PRT) disable_debug_output=FALSE; 
+               if (debug_boot_arg & DB_SLOG) systemLogDiags=TRUE; 
+               if (debug_boot_arg & DB_NMI) panicDebugging=TRUE; 
+               if (debug_boot_arg & DB_LOG_PI_SCRN) logPanicDataToScreen=TRUE;
+       } else {
+               debug_boot_arg = 0;
        }
 
        if (!PE_parse_boot_argn("nvram_paniclog", &commit_paniclog_to_nvram, sizeof (commit_paniclog_to_nvram)))
@@ -714,13 +715,11 @@ panic_io_port_read(void) {
 /* For use with the MP rendezvous mechanism
  */
 
-#if !CONFIG_EMBEDDED
 static void
 machine_halt_cpu(__unused void *arg) {
        panic_io_port_read();
        pmCPUHalt(PM_HALT_DEBUG);
 }
-#endif
 
 void
 Debugger(
@@ -762,7 +761,7 @@ Debugger(
 #endif
 
                /* Print backtrace - callee is internally synchronized */
-               panic_i386_backtrace(stackptr, 20, NULL, FALSE, NULL);
+               panic_i386_backtrace(stackptr, 32, NULL, FALSE, NULL);
 
                /* everything should be printed now so copy to NVRAM
                 */
@@ -819,23 +818,28 @@ Debugger(
                        }
                     }
                 }
-               draw_panic_dialog();
+
+               /* If the user won't be able to read the dialog,
+                * don't bother trying to show it
+                */
+               if (!PE_reboot_on_panic())
+                       draw_panic_dialog();
 
                if (!panicDebugging) {
                        /* Clear the MP rendezvous function lock, in the event
                         * that a panic occurred while in that codepath.
                         */
                        mp_rendezvous_break_lock();
-#if CONFIG_EMBEDDED
-                       PEHaltRestart(kPEPanicRestartCPU);
-#else
+                       if (PE_reboot_on_panic()) {
+                               PEHaltRestart(kPEPanicRestartCPU);
+                       }
+
                        /* Force all CPUs to disable interrupts and HLT.
                         * We've panicked, and shouldn't depend on the
                         * PEHaltRestart() mechanism, which relies on several
                         * bits of infrastructure.
                         */
                        mp_rendezvous_no_intrs(machine_halt_cpu, NULL);
-#endif
                        /* NOT REACHED */
                }
         }
index 470e8a3e73cdbb347f21c606f32d0ecb03989144..58791ecb83a7b5e7a803384f06c64f5698c21cce 100644 (file)
@@ -52,7 +52,6 @@
 #define        k64Bit                          0x00000200      /* processor supports EM64T (not what mode you're running in) */
 #define        kHasSSE4_1                      0x00000400
 #define        kHasSSE4_2                      0x00000800
-#define        kHasAES                         0x00001000
 #define        kInOrderPipeline                0x00002000      /* in-order execution */
 #define        kSlow                           0x00004000      /* tsc < nanosecond */
 #define        kUP                             0x00008000      /* set if (kNumCPUs == 1) */
index c247a157df5b7f6459589a9dd4a53d2b29e51bc4..1ddb1469e0b645033961add91e439f5aa1c77af9 100644 (file)
@@ -573,6 +573,7 @@ cpuid_set_generic_info(i386_cpu_info_t *info_p)
                cpuid_fn(6, reg);
                ctp->sensor               = bitfield32(reg[eax], 0, 0);
                ctp->dynamic_acceleration = bitfield32(reg[eax], 1, 1);
+               ctp->invariant_APIC_timer = bitfield32(reg[eax], 2, 2);
                ctp->thresholds           = bitfield32(reg[ebx], 3, 0);
                ctp->ACNT_MCNT            = bitfield32(reg[ecx], 0, 0);
                info_p->cpuid_thermal_leafp = ctp;
@@ -727,9 +728,9 @@ static struct {
 extfeature_map[] = {
        {CPUID_EXTFEATURE_SYSCALL, "SYSCALL"},
        {CPUID_EXTFEATURE_XD,      "XD"},
+       {CPUID_EXTFEATURE_RDTSCP,  "RDTSCP"},
        {CPUID_EXTFEATURE_EM64T,   "EM64T"},
        {CPUID_EXTFEATURE_LAHF,    "LAHF"},
-       {CPUID_EXTFEATURE_RDTSCP,  "RDTSCP"},
        {CPUID_EXTFEATURE_TSCI,    "TSCI"},
        {0, 0}
 };
index 135ededc36f7ef0122995c6d47e9d11bf7c20c4a..32b07e12ab4a6440a42720de7ee774ec3fb83ce8 100644 (file)
@@ -84,6 +84,7 @@
 #define CPUID_FEATURE_PBE     _Bit(31) /* Pend Break Enable */
 
 #define CPUID_FEATURE_SSE3    _HBit(0) /* Streaming SIMD extensions 3 */
+
 #define CPUID_FEATURE_MONITOR _HBit(3) /* Monitor/mwait */
 #define CPUID_FEATURE_DSCPL   _HBit(4) /* Debug Store CPL */
 #define CPUID_FEATURE_VMX     _HBit(5) /* VMX */
@@ -95,6 +96,7 @@
 #define CPUID_FEATURE_CX16    _HBit(13)        /* CmpXchg16b instruction */
 #define CPUID_FEATURE_xTPR    _HBit(14)        /* Send Task PRiority msgs */
 #define CPUID_FEATURE_PDCM    _HBit(15)        /* Perf/Debug Capability MSR */
+
 #define CPUID_FEATURE_DCA     _HBit(18)        /* Direct Cache Access */
 #define CPUID_FEATURE_SSE4_1  _HBit(19)        /* Streaming SIMD extensions 4.1 */
 #define CPUID_FEATURE_SSE4_2  _HBit(20)        /* Streaming SIMD extensions 4.2 */
  */
 #define CPUID_EXTFEATURE_SYSCALL   _Bit(11)    /* SYSCALL/sysret */
 #define CPUID_EXTFEATURE_XD       _Bit(20)     /* eXecute Disable */
+
 #define CPUID_EXTFEATURE_RDTSCP           _Bit(27)     /* RDTSCP */
 #define CPUID_EXTFEATURE_EM64T    _Bit(29)     /* Extended Mem 64 Technology */
 
-#define CPUID_EXTFEATURE_LAHF     _HBit(20)    /* LAFH/SAHF instructions */
+#define CPUID_EXTFEATURE_LAHF     _HBit(0)     /* LAHF/SAHF instructions */
 
 /*
  * The CPUID_EXTFEATURE_XXX values define 64-bit values
 #define CPUID_MODEL_MEROM      15
 #define CPUID_MODEL_PENRYN     23
 #define CPUID_MODEL_NEHALEM    26
-#define CPUID_MODEL_ATOM       28
 #define CPUID_MODEL_FIELDS     30      /* Lynnfield, Clarksfield, Jasper */
 #define CPUID_MODEL_DALES      31      /* Havendale, Auburndale */
 #define CPUID_MODEL_NEHALEM_EX 46
@@ -200,6 +202,7 @@ typedef struct {
 typedef struct {
        boolean_t       sensor;
        boolean_t       dynamic_acceleration;
+       boolean_t       invariant_APIC_timer;
        uint32_t        thresholds;
        boolean_t       ACNT_MCNT;
 } cpuid_thermal_leaf_t;
index 0206d098697420782370ee646559ea0b8a894281..21e974bff2763ca898cbcd4ac78d4bb97bb804b5 100644 (file)
@@ -37,6 +37,7 @@
 #include <kern/cpu_data.h>
 #include <kern/assert.h>
 #include <kern/machine.h>
+#include <kern/debug.h>
 
 #include <vm/vm_map.h>
 #include <vm/vm_kern.h>
@@ -90,12 +91,11 @@ static unsigned lapic_master_error_count = 0;
 static unsigned lapic_error_count_threshold = 5;
 static boolean_t lapic_dont_panic = FALSE;
 
-extern int     debug_boot_arg;
-
 /* Base vector for local APIC interrupt sources */
 int lapic_interrupt_base = LAPIC_DEFAULT_INTERRUPT_BASE;
 
-int            lapic_to_cpu[MAX_CPUS];
+#define                MAX_LAPICIDS    (LAPIC_ID_MAX+1)
+int            lapic_to_cpu[MAX_LAPICIDS];
 int            cpu_to_lapic[MAX_CPUS];
 
 static void
@@ -103,15 +103,17 @@ lapic_cpu_map_init(void)
 {
        int     i;
 
-       for (i = 0; i < MAX_CPUS; i++) {
-               lapic_to_cpu[i] = -1;
+       for (i = 0; i < MAX_CPUS; i++)
                cpu_to_lapic[i] = -1;
-       }
+       for (i = 0; i < MAX_LAPICIDS; i++)
+               lapic_to_cpu[i] = -1;
 }
 
 void
 lapic_cpu_map(int apic_id, int cpu)
 {
+       assert(apic_id < MAX_LAPICIDS);
+       assert(cpu < MAX_CPUS);
        cpu_to_lapic[cpu] = apic_id;
        lapic_to_cpu[apic_id] = cpu;
 }
@@ -137,7 +139,7 @@ ml_get_apicid(uint32_t cpu)
 uint32_t
 ml_get_cpuid(uint32_t lapic_index)
 {
-       if(lapic_index >= (uint32_t)MAX_CPUS)
+       if(lapic_index >= (uint32_t)MAX_LAPICIDS)
                return 0xFFFFFFFF;      /* Return -1 if cpu too big */
        
        /* Return the cpu ID (or -1 if not configured) */
@@ -158,7 +160,7 @@ lapic_cpu_map_dump(void)
                kprintf("cpu_to_lapic[%d]: %d\n",
                        i, cpu_to_lapic[i]);
        }
-       for (i = 0; i < MAX_CPUS; i++) {
+       for (i = 0; i < MAX_LAPICIDS; i++) {
                if (lapic_to_cpu[i] == -1)
                        continue;
                kprintf("lapic_to_cpu[%d]: %d\n",
index 6df816c557bfa57eb0540d3685d0329d857dac76..27f1fb52bf573198cc8a8a328d8495e164104061 100644 (file)
@@ -786,10 +786,6 @@ copyio(int copy_type, user_addr_t user_addr, char *kernel_addr,
        }
         pmap = thread->map->pmap;
 
-#if CONFIG_DTRACE
-       thread->machine.specFlags |= CopyIOActive;
-#endif /* CONFIG_DTRACE */
-
         if (pmap == kernel_pmap || use_kernel_map) {
 
                kern_vaddr = (vm_offset_t)user_addr;
@@ -819,13 +815,18 @@ copyio(int copy_type, user_addr_t user_addr, char *kernel_addr,
                KERNEL_DEBUG(debug_type | DBG_FUNC_END, (unsigned)kern_vaddr,
                             (unsigned)kernel_addr, (unsigned)nbytes,
                             error | 0x80000000, 0);
+               return (error);
+       }
 
 #if CONFIG_DTRACE
-       thread->machine.specFlags &= ~CopyIOActive;
+       thread->machine.specFlags |= CopyIOActive;
 #endif /* CONFIG_DTRACE */
 
-               return (error);
+       if ((nbytes && (user_addr + nbytes <= user_addr)) || ((user_addr + nbytes) > vm_map_max(thread->map))) {
+               error = EFAULT;
+               goto done;
        }
+
        user_base = user_addr & ~((user_addr_t)(NBPDE - 1));
        user_offset = (vm_offset_t)(user_addr & (NBPDE - 1));
 
@@ -1029,6 +1030,8 @@ copyio_phys(addr64_t source, addr64_t sink, vm_size_t csize, int which)
        }
        window_offset = (char *)((uint32_t)paddr & (PAGE_SIZE - 1));
 
+       assert(!((current_thread()->machine.specFlags & CopyIOActive) && ((which & cppvKmap) == 0)));
+
        if (current_thread()->machine.physwindow_busy) {
                pt_entry_t      old_pentry;
 
index 56fe44b1722b903e98ddc86360c4685c7a5605e5..0efbb917c4609c40fa58e420d71e24afee2cda19 100644 (file)
@@ -113,8 +113,8 @@ machine_idle(void)
 
     if (pmInitDone
        && pmDispatch != NULL
-       && pmDispatch->cstateMachineIdle != NULL)
-       (*pmDispatch->cstateMachineIdle)(0x7FFFFFFFFFFFFFFFULL);
+       && pmDispatch->MachineIdle != NULL)
+       (*pmDispatch->MachineIdle)(0x7FFFFFFFFFFFFFFFULL);
     else {
        /*
         * If no power management, re-enable interrupts and halt.
@@ -562,8 +562,10 @@ machine_run_count(uint32_t count)
 }
 
 boolean_t
-machine_cpu_is_inactive(int cpu)
+machine_processor_is_inactive(processor_t processor)
 {
+    int                cpu = processor->cpu_id;
+
     if (pmDispatch != NULL
        && pmDispatch->pmIsCPUUnAvailable != NULL)
        return(pmDispatch->pmIsCPUUnAvailable(cpu_to_lcpu(cpu)));
@@ -571,6 +573,43 @@ machine_cpu_is_inactive(int cpu)
        return(FALSE);
 }
 
+processor_t
+machine_choose_processor(processor_set_t pset,
+                        processor_t preferred)
+{
+    int                startCPU;
+    int                endCPU;
+    int                preferredCPU;
+    int                chosenCPU;
+
+    if (!pmInitDone)
+       return(preferred);
+
+    if (pset == NULL) {
+       startCPU = -1;
+       endCPU = -1;
+    } else {
+       startCPU = pset->cpu_set_low;
+       endCPU = pset->cpu_set_hi;
+    }
+
+    if (preferred == NULL)
+       preferredCPU = -1;
+    else
+       preferredCPU = preferred->cpu_id;
+
+    if (pmDispatch != NULL
+       && pmDispatch->pmChooseCPU != NULL) {
+       chosenCPU = pmDispatch->pmChooseCPU(startCPU, endCPU, preferredCPU);
+
+       if (chosenCPU == -1)
+           return(NULL);
+       return(cpu_datap(chosenCPU)->cpu_processor);
+    }
+
+    return(preferred);
+}
+
 static uint32_t
 pmGetSavedRunCount(void)
 {
index 5609df50e3860165846b454a724fd61c6a09d896..ff67de670da85b1f1095733299902a8fa884a57e 100644 (file)
@@ -38,7 +38,7 @@
  * This value should be changed each time that pmDsipatch_t or pmCallBacks_t
  * changes.
  */
-#define PM_DISPATCH_VERSION    18
+#define PM_DISPATCH_VERSION    19
 
 /*
  * Dispatch table for functions that get installed when the power
@@ -54,7 +54,7 @@ typedef struct
 {
     int                        (*pmCPUStateInit)(void);
     void               (*cstateInit)(void);
-    uint64_t           (*cstateMachineIdle)(uint64_t maxIdleDuration);
+    uint64_t           (*MachineIdle)(uint64_t maxIdleDuration);
     uint64_t           (*GetDeadline)(x86_lcpu_t *lcpu);
     uint64_t           (*SetDeadline)(x86_lcpu_t *lcpu, uint64_t);
     void               (*Deadline)(x86_lcpu_t *lcpu);
@@ -75,6 +75,7 @@ typedef struct
     void               (*markAllCPUsOff)(void);
     void               (*pmSetRunCount)(uint32_t count);
     boolean_t          (*pmIsCPUUnAvailable)(x86_lcpu_t *lcpu);
+    int                        (*pmChooseCPU)(int startCPU, int endCPU, int preferredCPU);
     int                        (*pmIPIHandler)(void *state);
 } pmDispatch_t;
 
index 311763f1fbe61db08dd8178d9b154b5adef7ebb0..e7135803a95343ebf4eff3c910988db77b1d2677 100644 (file)
@@ -89,7 +89,6 @@
  */
 
 #include <string.h>
-#include <norma_vm.h>
 #include <mach_kdb.h>
 #include <mach_ldebug.h>
 
@@ -219,143 +218,10 @@ boolean_t pmap_trace = FALSE;
 uint64_t max_preemption_latency_tsc = 0;
 
 
-/*
- *     Private data structures.
- */
-
-/*
- *     For each vm_page_t, there is a list of all currently
- *     valid virtual mappings of that page.  An entry is
- *     a pv_rooted_entry_t; the list is the pv_table.
- *
- *      N.B.  with the new combo rooted/hashed scheme it is
- *      only possibly to remove individual non-rooted entries
- *      if they are found via the hashed chains as there is no
- *      way to unlink the singly linked hashed entries if navigated to
- *      via the queue list off the rooted entries.  Think of it as
- *      hash/walk/pull, keeping track of the prev pointer while walking
- *      the singly linked hash list.  All of this is to save memory and
- *      keep both types of pv_entries as small as possible.
- */
-
-/*
-
-PV HASHING Changes - JK 1/2007
-
-Pve's establish physical to virtual mappings.  These are used for aliasing of a 
-physical page to (potentially many) virtual addresses within pmaps. In the previous 
-implementation the structure of the pv_entries (each 16 bytes in size) was
-
-typedef struct pv_entry {
-    struct pv_entry_t    next;
-    pmap_t                    pmap;
-    vm_map_offset_t   va;
-} *pv_entry_t;
-
-An initial array of these is created at boot time, one per physical page of memory, 
-indexed by the physical page number. Additionally, a pool of entries is created from a 
-pv_zone to be used as needed by pmap_enter() when it is creating new mappings.  
-Originally, we kept this pool around because the code in pmap_enter() was unable to 
-block if it needed an entry and none were available - we'd panic.  Some time ago I 
-restructured the pmap_enter() code so that for user pmaps it can block while zalloc'ing 
-a pv structure and restart, removing a panic from the code (in the case of the kernel 
-pmap we cannot block and still panic, so, we keep a separate hot pool for use only on 
-kernel pmaps).  The pool has not been removed since there is a large performance gain 
-keeping freed pv's around for reuse and not suffering the overhead of zalloc for every new pv we need.
-
-As pmap_enter() created new mappings it linked the new pve's for them off the fixed 
-pv array for that ppn (off the next pointer).  These pve's are accessed for several 
-operations, one of them being address space teardown.  In that case, we basically do this
-
-       for (every page/pte in the space) {
-               calc pve_ptr from the ppn in the pte
-               for (every pv in the list for the ppn) {
-                       if (this pv is for this pmap/vaddr) {
-                               do housekeeping
-                               unlink/free the pv
-                       }
-               }
-       }
-
-The problem arose when we were running, say 8000 (or even 2000) apache or other processes 
-and one or all terminate. The list hanging off each pv array entry could have thousands of 
-entries.  We were continuously linearly searching each of these lists as we stepped through 
-the address space we were tearing down.  Because of the locks we hold, likely taking a cache 
-miss for each node,  and interrupt disabling for MP issues the system became completely 
-unresponsive for many seconds while we did this.
-
-Realizing that pve's are accessed in two distinct ways (linearly running the list by ppn 
-for operations like pmap_page_protect and finding and modifying/removing a single pve as 
-part of pmap_enter processing) has led to modifying the pve structures and databases.
-
-There are now two types of pve structures.  A "rooted" structure which is basically the 
-original structure accessed in an array by ppn, and a ''hashed'' structure accessed on a 
-hash list via a hash of [pmap, vaddr].  These have been designed with the two goals of 
-minimizing wired memory and making the lookup of a ppn faster.  Since a vast majority of 
-pages in the system are not aliased and hence represented by a single pv entry I've kept 
-the rooted entry size as small as possible because there is one of these dedicated for 
-every physical page of memory.  The hashed pve's are larger due to the addition of the hash 
-link and the ppn entry needed for matching while running the hash list to find the entry we 
-are looking for.  This way, only systems that have lots of aliasing (like 2000+ httpd procs) 
-will pay the extra memory price. Both structures have the same first three fields allowing 
-some simplification in the code.
-
-They have these shapes
-
-typedef struct pv_rooted_entry {
-        queue_head_t qlink;
-        vm_map_offset_t va;
-        pmap_t          pmap;
-} *pv_rooted_entry_t;
-
-
-typedef struct pv_hashed_entry {
-  queue_head_t qlink;
-  vm_map_offset_t va;
-  pmap_t        pmap;
-  ppnum_t ppn;
-  struct pv_hashed_entry *nexth;
-} *pv_hashed_entry_t;
-
-The main flow difference is that the code is now aware of the rooted entry and the hashed 
-entries.  Code that runs the pv list still starts with the rooted entry and then continues 
-down the qlink onto the hashed entries.  Code that is looking up a specific pv entry first 
-checks the rooted entry and then hashes and runs the hash list for the match. The hash list 
-lengths are much smaller than the original pv lists that contained all aliases for the specific ppn.
-
-*/
-
-typedef struct pv_rooted_entry {     /* first three entries must match pv_hashed_entry_t */
-        queue_head_t qlink;
-       vm_map_offset_t va;             /* virtual address for mapping */
-       pmap_t          pmap;           /* pmap where mapping lies */
-} *pv_rooted_entry_t;
-
-#define PV_ROOTED_ENTRY_NULL   ((pv_rooted_entry_t) 0)
-
-pv_rooted_entry_t      pv_head_table;          /* array of entries, one per page */
-
-typedef struct pv_hashed_entry {     /* first three entries must match pv_rooted_entry_t */
-  queue_head_t qlink;
-  vm_map_offset_t va;
-  pmap_t        pmap;
-  ppnum_t ppn;
-  struct pv_hashed_entry *nexth;
-} *pv_hashed_entry_t;
-
-#define PV_HASHED_ENTRY_NULL ((pv_hashed_entry_t)0)
-
-#define NPVHASH 4095   /* MUST BE 2^N - 1 */
 pv_hashed_entry_t     *pv_hash_table;  /* hash lists */
 
 uint32_t npvhash = 0;
 
-/* #define PV_DEBUG 1   uncomment to enable some PV debugging code */
-#ifdef PV_DEBUG
-#define CHK_NPVHASH() if(0 == npvhash) panic("npvhash uninitialized");
-#else
-#define CHK_NPVHASH()
-#endif
 
 /*
  *     pv_list entries are kept on a list that can only be accessed
@@ -373,53 +239,6 @@ int pv_free_count = 0;
 int pv_hashed_free_count = 0;
 int pv_kern_free_count = 0;
 int pv_hashed_kern_free_count = 0;
-#define PV_HASHED_LOW_WATER_MARK 5000
-#define PV_HASHED_KERN_LOW_WATER_MARK 100
-#define PV_HASHED_ALLOC_CHUNK 2000
-#define PV_HASHED_KERN_ALLOC_CHUNK 50
-thread_call_t  mapping_adjust_call;
-static thread_call_data_t  mapping_adjust_call_data;
-uint32_t mappingrecurse = 0;
-
-#define        PV_HASHED_ALLOC(pvh_e) { \
-       simple_lock(&pv_hashed_free_list_lock); \
-       if ((pvh_e = pv_hashed_free_list) != 0) { \
-         pv_hashed_free_list = (pv_hashed_entry_t)pvh_e->qlink.next;   \
-            pv_hashed_free_count--; \
-            if (pv_hashed_free_count < PV_HASHED_LOW_WATER_MARK) \
-              if (hw_compare_and_store(0,1,(u_int *)&mappingrecurse)) \
-                thread_call_enter(mapping_adjust_call); \
-       } \
-       simple_unlock(&pv_hashed_free_list_lock); \
-}
-
-#define        PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pv_cnt) {   \
-       simple_lock(&pv_hashed_free_list_lock); \
-       pvh_et->qlink.next = (queue_entry_t)pv_hashed_free_list;        \
-       pv_hashed_free_list = pvh_eh; \
-        pv_hashed_free_count += pv_cnt; \
-       simple_unlock(&pv_hashed_free_list_lock); \
-}
-
-#define        PV_HASHED_KERN_ALLOC(pvh_e) { \
-       simple_lock(&pv_hashed_kern_free_list_lock); \
-       if ((pvh_e = pv_hashed_kern_free_list) != 0) { \
-         pv_hashed_kern_free_list = (pv_hashed_entry_t)pvh_e->qlink.next;      \
-            pv_hashed_kern_free_count--; \
-            if (pv_hashed_kern_free_count < PV_HASHED_KERN_LOW_WATER_MARK) \
-              if (hw_compare_and_store(0,1,(u_int *)&mappingrecurse)) \
-                thread_call_enter(mapping_adjust_call); \
-       } \
-       simple_unlock(&pv_hashed_kern_free_list_lock); \
-}
-
-#define        PV_HASHED_KERN_FREE_LIST(pvh_eh, pvh_et, pv_cnt) {       \
-       simple_lock(&pv_hashed_kern_free_list_lock); \
-       pvh_et->qlink.next = (queue_entry_t)pv_hashed_kern_free_list;   \
-       pv_hashed_kern_free_list = pvh_eh; \
-        pv_hashed_kern_free_count += pv_cnt; \
-       simple_unlock(&pv_hashed_kern_free_list_lock); \
-}
 
 zone_t         pv_hashed_list_zone;    /* zone of pv_hashed_entry structures */
 
@@ -447,23 +266,6 @@ boolean_t  pmap_initialized = FALSE;/* Has pmap_init completed? */
 static struct vm_object kptobj_object_store;
 static vm_object_t kptobj;
 
-/*
- *     Index into pv_head table, its lock bits, and the modify/reference and managed bits
- */
-
-#define pa_index(pa)   (i386_btop(pa))
-#define ppn_to_pai(ppn)        ((int)ppn)
-
-#define pai_to_pvh(pai)                (&pv_head_table[pai])
-#define lock_pvh_pai(pai)      bit_lock(pai, (void *)pv_lock_table)
-#define unlock_pvh_pai(pai)    bit_unlock(pai, (void *)pv_lock_table)
-
-#define pvhashidx(pmap, va) (((uint32_t)pmap ^ ((uint32_t)((uint64_t)va >> PAGE_SHIFT) & 0xFFFFFFFF)) & npvhash)
-#define pvhash(idx)         (&pv_hash_table[idx])
-
-#define lock_hash_hash(hash)           bit_lock(hash, (void *)pv_hash_lock_table)
-#define unlock_hash_hash(hash) bit_unlock(hash, (void *)pv_hash_lock_table)
-
 /*
  *     Array of physical page attribites for managed pages.
  *     One byte per physical page.
@@ -596,44 +398,6 @@ static int nkpt;
 pt_entry_t     *DMAP1, *DMAP2;
 caddr_t         DADDR1;
 caddr_t         DADDR2;
-
-static inline
-void pmap_pvh_unlink(pv_hashed_entry_t pv);
-
-/*
- * unlinks the pv_hashed_entry_t pvh from the singly linked hash chain.
- * properly deals with the anchor.
- * must be called with the hash locked, does not unlock it
- */
-
-static inline
-void pmap_pvh_unlink(pv_hashed_entry_t pvh)
-{
-  pv_hashed_entry_t curh;
-  pv_hashed_entry_t *pprevh;
-  int pvhash_idx;
-
-  CHK_NPVHASH();
-  pvhash_idx = pvhashidx(pvh->pmap, pvh->va);
-
-  pprevh = pvhash(pvhash_idx);
-
-#if PV_DEBUG
-  if (NULL == *pprevh) panic("pvh_unlink null anchor"); /* JK DEBUG */
-#endif
-  curh = *pprevh;
-
-  while (PV_HASHED_ENTRY_NULL != curh) {
-    if (pvh == curh)
-      break;
-    pprevh = &curh->nexth;
-    curh = curh->nexth;
-  }
-  if (PV_HASHED_ENTRY_NULL == curh) panic("pmap_pvh_unlink no pvh");
-  *pprevh = pvh->nexth;
-  return;
-}
-
 /*
  * for legacy, returns the address of the pde entry.
  * for 64 bit, causes the pdpt page containing the pde entry to be mapped,
@@ -1550,7 +1314,7 @@ pmap_create(
                va = (vm_offset_t)p->dirbase;
                p->pdirbase = kvtophys(va);
 
-               template = cpu_64bit ? INTEL_PTE_VALID|INTEL_PTE_RW|INTEL_PTE_USER|INTEL_PTE_REF : INTEL_PTE_VALID;
+               template = INTEL_PTE_VALID;
                for (i = 0; i< NPGPTD; i++, pdpt++ ) {
                        pmap_paddr_t pa;
                        pa = (pmap_paddr_t) kvtophys((vm_offset_t)(va + i386_ptob(i)));
@@ -1588,7 +1352,7 @@ pmap_create(
                /* uber space points to uber mapped kernel */
                s = splhigh();
                pml4p = pmap64_pml4(p, 0ULL);
-               pmap_store_pte((pml4p+KERNEL_UBER_PML4_INDEX),*kernel_pmap->pm_pml4);
+               pmap_store_pte((pml4p+KERNEL_UBER_PML4_INDEX), *kernel_pmap->pm_pml4);
 
 
                if (!is_64bit) {
@@ -1815,231 +1579,6 @@ pmap_reference(
        }
 }
 
-/*
- *     Remove a range of hardware page-table entries.
- *     The entries given are the first (inclusive)
- *     and last (exclusive) entries for the VM pages.
- *     The virtual address is the va for the first pte.
- *
- *     The pmap must be locked.
- *     If the pmap is not the kernel pmap, the range must lie
- *     entirely within one pte-page.  This is NOT checked.
- *     Assumes that the pte-page exists.
- */
-
-void
-pmap_remove_range(
-       pmap_t                  pmap,
-       vm_map_offset_t         start_vaddr,
-       pt_entry_t              *spte,
-       pt_entry_t              *epte)
-{
-       register pt_entry_t     *cpte;
-       pv_hashed_entry_t       pvh_et = PV_HASHED_ENTRY_NULL;
-       pv_hashed_entry_t       pvh_eh = PV_HASHED_ENTRY_NULL;
-       pv_hashed_entry_t       pvh_e;
-       int                     pvh_cnt = 0;
-       int                     num_removed, num_unwired, num_found;
-       int                     pai;
-       pmap_paddr_t            pa;
-       vm_map_offset_t         vaddr;
-       int                     pvhash_idx;
-       uint32_t                pv_cnt;
-
-       num_removed = 0;
-       num_unwired = 0;
-       num_found   = 0;
-
-       if (pmap != kernel_pmap &&
-           pmap->pm_task_map == TASK_MAP_32BIT &&
-           start_vaddr >= HIGH_MEM_BASE) {
-               /*
-                * The range is in the "high_shared_pde" which is shared
-                * between the kernel and all 32-bit tasks.  It holds
-                * the 32-bit commpage but also the trampolines, GDT, etc...
-                * so we can't let user tasks remove anything from it.
-                */
-               return;
-       }
-
-       /* invalidate the PTEs first to "freeze" them */
-       for (cpte = spte, vaddr = start_vaddr;
-            cpte < epte;
-            cpte++, vaddr += PAGE_SIZE_64) {
-
-           pa = pte_to_pa(*cpte);
-           if (pa == 0)
-               continue;
-           num_found++;
-
-           if (iswired(*cpte))
-               num_unwired++;
-
-           pai = pa_index(pa);
-
-           if (!managed_page(pai)) {
-               /*
-                *      Outside range of managed physical memory.
-                *      Just remove the mappings.
-                */
-               pmap_store_pte(cpte, 0);
-               continue;
-           }
-
-           /* invalidate the PTE */ 
-           pmap_update_pte(cpte, *cpte, (*cpte & ~INTEL_PTE_VALID));
-       }
-
-       if (num_found == 0) {
-               /* nothing was changed: we're done */
-               goto update_counts;
-       }
-
-       /* propagate the invalidates to other CPUs */
-
-       PMAP_UPDATE_TLBS(pmap, start_vaddr, vaddr);
-
-       for (cpte = spte, vaddr = start_vaddr;
-            cpte < epte;
-            cpte++, vaddr += PAGE_SIZE_64) {
-
-           pa = pte_to_pa(*cpte);
-           if (pa == 0)
-               continue;
-
-           pai = pa_index(pa);
-
-           LOCK_PVH(pai);
-
-           pa = pte_to_pa(*cpte);
-           if (pa == 0) {
-             UNLOCK_PVH(pai);
-             continue;
-           }
-             
-           num_removed++;
-
-           /*
-            *  Get the modify and reference bits, then
-            *  nuke the entry in the page table
-            */
-           /* remember reference and change */
-           pmap_phys_attributes[pai] |=
-                   (char)(*cpte & (PHYS_MODIFIED | PHYS_REFERENCED));
-           /* completely invalidate the PTE */
-           pmap_store_pte(cpte, 0);
-
-           /*
-            *  Remove the mapping from the pvlist for
-            *  this physical page.
-            */
-           {
-             pv_rooted_entry_t pv_h;
-             pv_hashed_entry_t *pprevh;
-             ppnum_t ppn = (ppnum_t)pai;
-
-               pv_h = pai_to_pvh(pai);
-               pvh_e = PV_HASHED_ENTRY_NULL;
-               if (pv_h->pmap == PMAP_NULL)
-                   panic("pmap_remove_range: null pv_list!");
-
-               if (pv_h->va == vaddr && pv_h->pmap == pmap) { /* rooted or not */
-                   /*
-                    * Header is the pv_rooted_entry. We can't free that. If there is a queued
-                    * entry after this one we remove that
-                    * from the ppn queue, we remove it from the hash chain
-                    * and copy it to the rooted entry. Then free it instead.
-                    */
-
-                 pvh_e = (pv_hashed_entry_t)queue_next(&pv_h->qlink);
-                 if (pv_h != (pv_rooted_entry_t)pvh_e) {  /* any queued after rooted? */
-                   CHK_NPVHASH();
-                   pvhash_idx = pvhashidx(pvh_e->pmap,pvh_e->va);
-                   LOCK_PV_HASH(pvhash_idx);
-                   remque(&pvh_e->qlink);
-                   {
-                     pprevh = pvhash(pvhash_idx);
-                     if (PV_HASHED_ENTRY_NULL == *pprevh) {
-                       panic("pmap_remove_range empty hash removing rooted pv");
-                     }
-                   }
-                   pmap_pvh_unlink(pvh_e);
-                   UNLOCK_PV_HASH(pvhash_idx);
-                   pv_h->pmap = pvh_e->pmap;
-                   pv_h->va = pvh_e->va;   /* dispose of pvh_e */
-                 } else {  /* none queued after rooted */
-                   pv_h->pmap = PMAP_NULL;
-                   pvh_e = PV_HASHED_ENTRY_NULL;
-                 }   /* any queued after rooted */
-
-               } else { /* rooted or not */
-                 /* not removing rooted pv. find it on hash chain, remove from ppn queue and
-                  * hash chain and free it */
-                 CHK_NPVHASH();
-                 pvhash_idx = pvhashidx(pmap,vaddr);
-                 LOCK_PV_HASH(pvhash_idx);
-                 pprevh = pvhash(pvhash_idx);
-                 if (PV_HASHED_ENTRY_NULL == *pprevh) {
-                   panic("pmap_remove_range empty hash removing hashed pv");
-                   }
-                 pvh_e = *pprevh;
-                 pmap_pv_hashlist_walks++;
-                 pv_cnt = 0;
-                 while (PV_HASHED_ENTRY_NULL != pvh_e) {
-                       pv_cnt++;
-                       if (pvh_e->pmap == pmap && pvh_e->va == vaddr && pvh_e->ppn == ppn) break;
-                       pprevh = &pvh_e->nexth;
-                       pvh_e = pvh_e->nexth;
-                 }
-                 pmap_pv_hashlist_cnts += pv_cnt;
-                 if (pmap_pv_hashlist_max < pv_cnt) pmap_pv_hashlist_max = pv_cnt;
-                 if (PV_HASHED_ENTRY_NULL == pvh_e) panic("pmap_remove_range pv not on hash");
-                 *pprevh = pvh_e->nexth;
-                 remque(&pvh_e->qlink);
-                 UNLOCK_PV_HASH(pvhash_idx);
-
-               } /* rooted or not */
-
-               UNLOCK_PVH(pai);
-
-               if (pvh_e != PV_HASHED_ENTRY_NULL) {
-                 pvh_e->qlink.next = (queue_entry_t)pvh_eh;
-                 pvh_eh = pvh_e;
-
-                 if (pvh_et == PV_HASHED_ENTRY_NULL) {
-                   pvh_et = pvh_e;
-                 }
-
-                 pvh_cnt++;
-               }
-
-           } /* removing mappings for this phy page */
-       } /* for loop */
-       
-       if (pvh_eh != PV_HASHED_ENTRY_NULL) {
-           PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pvh_cnt);
-       }
-
-update_counts:
-       /*
-        *      Update the counts
-        */
-#if TESTING
-       if (pmap->stats.resident_count < num_removed)
-               panic("pmap_remove_range: resident_count");
-#endif
-       assert(pmap->stats.resident_count >= num_removed);
-       OSAddAtomic(-num_removed,  &pmap->stats.resident_count);
-
-#if TESTING
-       if (pmap->stats.wired_count < num_unwired)
-               panic("pmap_remove_range: wired_count");
-#endif
-       assert(pmap->stats.wired_count >= num_unwired);
-       OSAddAtomic(-num_unwired,  &pmap->stats.wired_count);
-
-       return;
-}
 
 /*
  *     Remove phys addr if mapped in specified map
@@ -2055,290 +1594,6 @@ pmap_remove_some_phys(
 
 }
 
-/*
- *     Remove the given range of addresses
- *     from the specified map.
- *
- *     It is assumed that the start and end are properly
- *     rounded to the hardware page size.
- */
-
-
-void
-pmap_remove(
-       pmap_t          map,
-       addr64_t        s64,
-       addr64_t        e64)
-{
-       pt_entry_t      *pde;
-       pt_entry_t      *spte, *epte;
-       addr64_t        l64;
-       addr64_t        orig_s64;
-       uint64_t        deadline;
-
-       pmap_intr_assert();
-
-       if (map == PMAP_NULL || s64 == e64)
-               return;
-       PMAP_TRACE(PMAP_CODE(PMAP__REMOVE) | DBG_FUNC_START,
-                  (int) map,
-                  (int) (s64>>32), (int) s64,
-                  (int) (e64>>32), (int) e64);
-
-       PMAP_LOCK(map);
-
-#if 0
-       /*
-        * Check that address range in the kernel does not overlap the stacks.
-        * We initialize local static min/max variables once to avoid making
-        * 2 function calls for every remove. Note also that these functions
-        * both return 0 before kernel stacks have been initialized, and hence
-        * the panic is not triggered in this case.
-        */
-       if (map == kernel_pmap) {
-               static vm_offset_t      kernel_stack_min = 0;
-               static vm_offset_t      kernel_stack_max = 0;
-
-               if (kernel_stack_min == 0) {
-                       kernel_stack_min = min_valid_stack_address();
-                       kernel_stack_max = max_valid_stack_address();
-               }
-               if  ((kernel_stack_min <= s64 && s64 <  kernel_stack_max) ||
-                    (kernel_stack_min <  e64 && e64 <= kernel_stack_max))
-                       panic("pmap_remove() attempted in kernel stack");
-       }
-#else
-
-       /*
-        * The values of kernel_stack_min and kernel_stack_max are no longer
-        * relevant now that we allocate kernel stacks anywhere in the kernel map,
-        * so the old code above no longer applies.  If we wanted to check that
-        * we weren't removing a mapping of a page in a kernel stack we'd have to
-        * mark the PTE with an unused bit and check that here.
-        */
-
-#endif
-
-       deadline = rdtsc64() + max_preemption_latency_tsc;
-
-       orig_s64 = s64;
-
-       while (s64 < e64) {
-           l64 = (s64 + pde_mapped_size) & ~(pde_mapped_size-1);
-           if (l64 > e64)
-               l64 = e64;
-           pde = pmap_pde(map, s64);
-
-           if (pde && (*pde & INTEL_PTE_VALID)) {
-               spte = (pt_entry_t *)pmap_pte(map, (s64 & ~(pde_mapped_size-1)));
-               spte = &spte[ptenum(s64)];
-               epte = &spte[intel_btop(l64-s64)];
-
-               pmap_remove_range(map, s64, spte, epte);
-           }
-           s64 = l64;
-           pde++;
-
-           if (s64 < e64 && rdtsc64() >= deadline) {
-             PMAP_UNLOCK(map)
-               PMAP_LOCK(map)
-
-             deadline = rdtsc64() + max_preemption_latency_tsc;
-           }
-
-       }
-
-       PMAP_UNLOCK(map);
-
-       PMAP_TRACE(PMAP_CODE(PMAP__REMOVE) | DBG_FUNC_END,
-                  (int) map, 0, 0, 0, 0);
-
-}
-
-/*
- *     Routine:        pmap_page_protect
- *
- *     Function:
- *             Lower the permission for all mappings to a given
- *             page.
- */
-void
-pmap_page_protect(
-        ppnum_t         pn,
-       vm_prot_t       prot)
-{
-       pv_hashed_entry_t               pvh_eh = PV_HASHED_ENTRY_NULL;
-       pv_hashed_entry_t               pvh_et = PV_HASHED_ENTRY_NULL;
-       pv_hashed_entry_t       nexth;
-       int                     pvh_cnt = 0;
-       pv_rooted_entry_t               pv_h;
-       pv_rooted_entry_t               pv_e;
-       pv_hashed_entry_t       pvh_e;
-       pt_entry_t              *pte;
-       int                     pai;
-       register pmap_t         pmap;
-       boolean_t               remove;
-       int                     pvhash_idx;
-
-       pmap_intr_assert();
-       assert(pn != vm_page_fictitious_addr);
-       if (pn == vm_page_guard_addr)
-               return;
-
-       pai = ppn_to_pai(pn);
-
-       if (!managed_page(pai)) {
-           /*
-            *  Not a managed page.
-            */
-           return;
-       }
-
-       PMAP_TRACE(PMAP_CODE(PMAP__PAGE_PROTECT) | DBG_FUNC_START,
-                  (int) pn, (int) prot, 0, 0, 0);
-
-       /*
-        * Determine the new protection.
-        */
-       switch (prot) {
-           case VM_PROT_READ:
-           case VM_PROT_READ|VM_PROT_EXECUTE:
-               remove = FALSE;
-               break;
-           case VM_PROT_ALL:
-               return; /* nothing to do */
-           default:
-               remove = TRUE;
-               break;
-       }
-
-       pv_h = pai_to_pvh(pai);
-
-       LOCK_PVH(pai);
-
-
-       /*
-        * Walk down PV list, changing or removing all mappings.
-        */
-       if (pv_h->pmap != PMAP_NULL) {
-
-           pv_e = pv_h;
-           pvh_e = (pv_hashed_entry_t)pv_e; /* cheat */
-
-           do {
-               register vm_map_offset_t vaddr;
-               pmap = pv_e->pmap;
-
-               vaddr = pv_e->va;
-               pte = pmap_pte(pmap, vaddr);
-               
-               if (0 == pte) {
-                       panic("pmap_page_protect: Missing PTE, pmap: %p, pn: 0x%x vaddr: 0x%llx, prot: %d kernel_pmap: %p", pmap, pn, vaddr, prot, kernel_pmap);
-               }
-
-               nexth = (pv_hashed_entry_t)queue_next(&pvh_e->qlink);  /* if there is one */
-
-               /*
-                * Remove the mapping if new protection is NONE
-                * or if write-protecting a kernel mapping.
-                */
-               if (remove || pmap == kernel_pmap) {
-                   /*
-                    * Remove the mapping, collecting any modify bits.
-                    */
-                   pmap_update_pte(pte, *pte, (*pte & ~INTEL_PTE_VALID));
-
-                   PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
-
-                   pmap_phys_attributes[pai] |= *pte & (PHYS_MODIFIED|PHYS_REFERENCED);
-
-                   pmap_store_pte(pte, 0);
-
-#if TESTING
-                   if (pmap->stats.resident_count < 1)
-                       panic("pmap_page_protect: resident_count");
-#endif
-                   assert(pmap->stats.resident_count >= 1);
-                   OSAddAtomic(-1,  &pmap->stats.resident_count);
-
-                   /*
-                    * Deal with the pv_rooted_entry.
-                    */
-
-                   if (pv_e == pv_h) {
-                       /*
-                        * Fix up head later.
-                        */
-                       pv_h->pmap = PMAP_NULL;
-                   }
-                   else {
-                       /*
-                        * Delete this entry.
-                        */
-                     CHK_NPVHASH();
-                     pvhash_idx = pvhashidx(pvh_e->pmap,pvh_e->va);
-                     LOCK_PV_HASH(pvhash_idx);
-                     remque(&pvh_e->qlink);
-                     pmap_pvh_unlink(pvh_e);
-                     UNLOCK_PV_HASH(pvhash_idx);
-
-                     pvh_e->qlink.next = (queue_entry_t)pvh_eh;
-                       pvh_eh = pvh_e;
-
-                       if (pvh_et == PV_HASHED_ENTRY_NULL)
-                           pvh_et = pvh_e;
-                       pvh_cnt++;
-                   }
-               } else {
-                   /*
-                    * Write-protect.
-                    */
-                   pmap_update_pte(pte, *pte, (*pte & ~INTEL_PTE_WRITE));
-                   PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
-               }
-
-               pvh_e = nexth;
-           } while ((pv_e = (pv_rooted_entry_t)nexth) != pv_h);
-
-
-           /*
-            * If pv_head mapping was removed, fix it up.
-            */
-
-           if (pv_h->pmap == PMAP_NULL) {
-             pvh_e = (pv_hashed_entry_t)queue_next(&pv_h->qlink);
-
-             if (pvh_e != (pv_hashed_entry_t)pv_h) {
-               CHK_NPVHASH();
-               pvhash_idx = pvhashidx(pvh_e->pmap,pvh_e->va);
-               LOCK_PV_HASH(pvhash_idx);
-               remque(&pvh_e->qlink);
-               pmap_pvh_unlink(pvh_e);
-               UNLOCK_PV_HASH(pvhash_idx);
-                 pv_h->pmap = pvh_e->pmap;
-                 pv_h->va = pvh_e->va;
-                 pvh_e->qlink.next = (queue_entry_t)pvh_eh;
-                   pvh_eh = pvh_e;
-
-                   if (pvh_et == PV_HASHED_ENTRY_NULL)
-                       pvh_et = pvh_e;
-                   pvh_cnt++;
-               }
-           }
-       }
-       if (pvh_eh != PV_HASHED_ENTRY_NULL) {
-           PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pvh_cnt);
-       }
-
-       UNLOCK_PVH(pai);
-
-       PMAP_TRACE(PMAP_CODE(PMAP__PAGE_PROTECT) | DBG_FUNC_END,
-                  0, 0, 0, 0, 0);
-
-}
-
-
 /*
  *     Routine:
  *             pmap_disconnect
@@ -2459,427 +1714,6 @@ pmap_map_block(
 }
 
 
-/*
- *     Insert the given physical page (p) at
- *     the specified virtual address (v) in the
- *     target physical map with the protection requested.
- *
- *     If specified, the page will be wired down, meaning
- *     that the related pte cannot be reclaimed.
- *
- *     NB:  This is the only routine which MAY NOT lazy-evaluate
- *     or lose information.  That is, this routine must actually
- *     insert this page into the given map NOW.
- */
-void
-pmap_enter(
-       register pmap_t         pmap,
-       vm_map_offset_t         vaddr,
-       ppnum_t                 pn,
-       vm_prot_t               prot,
-       unsigned int            flags,
-       boolean_t               wired)
-{
-       register pt_entry_t     *pte;
-       register pv_rooted_entry_t      pv_h;
-       register int            pai;
-       pv_hashed_entry_t               pvh_e;
-       pv_hashed_entry_t               pvh_new;
-       pv_hashed_entry_t       *hashp;
-       pt_entry_t              template;
-       pmap_paddr_t            old_pa;
-       pmap_paddr_t             pa = (pmap_paddr_t)i386_ptob(pn);
-       boolean_t               need_tlbflush = FALSE;
-       boolean_t               set_NX;
-       char                    oattr;
-       int                     pvhash_idx;
-       uint32_t                pv_cnt;
-       boolean_t               old_pa_locked;
-
-       pmap_intr_assert();
-       assert(pn != vm_page_fictitious_addr);
-       if (pmap_debug)
-               printf("pmap(%qx, %x)\n", vaddr, pn);
-       if (pmap == PMAP_NULL)
-               return;
-       if (pn == vm_page_guard_addr)
-               return;
-
-       PMAP_TRACE(PMAP_CODE(PMAP__ENTER) | DBG_FUNC_START,
-                  (int) pmap,
-                  (int) (vaddr>>32), (int) vaddr,
-                  (int) pn, prot);
-
-       if ( (prot & VM_PROT_EXECUTE) || !nx_enabled || !pmap->nx_enabled )
-               set_NX = FALSE;
-       else
-               set_NX = TRUE;
-       
-       /*
-        *      Must allocate a new pvlist entry while we're unlocked;
-        *      zalloc may cause pageout (which will lock the pmap system).
-        *      If we determine we need a pvlist entry, we will unlock
-        *      and allocate one.  Then we will retry, throughing away
-        *      the allocated entry later (if we no longer need it).
-        */
-
-       pvh_new = PV_HASHED_ENTRY_NULL;
-Retry:
-       pvh_e = PV_HASHED_ENTRY_NULL;
-
-       PMAP_LOCK(pmap);
-
-       /*
-        *      Expand pmap to include this pte.  Assume that
-        *      pmap is always expanded to include enough hardware
-        *      pages to map one VM page.
-        */
-
-       while ((pte = pmap_pte(pmap, vaddr)) == PT_ENTRY_NULL) {
-               /*
-                *      Must unlock to expand the pmap.
-                */
-               PMAP_UNLOCK(pmap);
-               pmap_expand(pmap, vaddr); /* going to grow pde level page(s) */
-               PMAP_LOCK(pmap);
-       }
-
-       old_pa = pte_to_pa(*pte);
-       pai = pa_index(old_pa);
-       old_pa_locked = FALSE;
-
-       /*
-        * if we have a previous managed page, lock the pv entry now. after
-        * we lock it, check to see if someone beat us to the lock and if so
-        * drop the lock
-        */
-
-       if ((0 != old_pa) && managed_page(pai)) {
-         LOCK_PVH(pai);
-         old_pa_locked = TRUE;
-         old_pa = pte_to_pa(*pte);
-         if (0 == old_pa) {
-           UNLOCK_PVH(pai);  /* some other path beat us to it */
-           old_pa_locked = FALSE;
-         }
-       }
-
-
-       /*
-        *      Special case if the incoming physical page is already mapped
-        *      at this address.
-        */
-       if (old_pa == pa) {
-
-           /*
-            *  May be changing its wired attribute or protection
-            */
-
-           template = pa_to_pte(pa) | INTEL_PTE_VALID;
-
-           if(VM_MEM_NOT_CACHEABLE == (flags & (VM_MEM_NOT_CACHEABLE | VM_WIMG_USE_DEFAULT))) {
-               if(!(flags & VM_MEM_GUARDED))
-                       template |= INTEL_PTE_PTA;
-               template |= INTEL_PTE_NCACHE;
-           }
-
-           if (pmap != kernel_pmap)
-               template |= INTEL_PTE_USER;
-           if (prot & VM_PROT_WRITE)
-               template |= INTEL_PTE_WRITE;
-
-           if (set_NX == TRUE)
-               template |= INTEL_PTE_NX;
-
-           if (wired) {
-               template |= INTEL_PTE_WIRED;
-               if (!iswired(*pte))
-                   OSAddAtomic(+1,  &pmap->stats.wired_count);
-           }
-           else {
-               if (iswired(*pte)) {
-                   assert(pmap->stats.wired_count >= 1);
-                   OSAddAtomic(-1,  &pmap->stats.wired_count);
-               }
-           }
-
-           /* store modified PTE and preserve RC bits */ 
-           pmap_update_pte(pte, *pte, template | (*pte & (INTEL_PTE_REF | INTEL_PTE_MOD)));
-           if (old_pa_locked) {
-             UNLOCK_PVH(pai);
-             old_pa_locked = FALSE;
-           }
-           need_tlbflush = TRUE;
-           goto Done;
-       }
-
-       /*
-        *      Outline of code from here:
-        *         1) If va was mapped, update TLBs, remove the mapping
-        *            and remove old pvlist entry.
-        *         2) Add pvlist entry for new mapping
-        *         3) Enter new mapping.
-        *
-        *      If the old physical page is not managed step 1) is skipped
-        *      (except for updating the TLBs), and the mapping is
-        *      overwritten at step 3).  If the new physical page is not
-        *      managed, step 2) is skipped.
-        */
-
-       if (old_pa != (pmap_paddr_t) 0) {
-
-           /*
-            *  Don't do anything to pages outside valid memory here.
-            *  Instead convince the code that enters a new mapping
-            *  to overwrite the old one.
-            */
-
-           /* invalidate the PTE */ 
-           pmap_update_pte(pte, *pte, (*pte & ~INTEL_PTE_VALID));
-           /* propagate invalidate everywhere */
-           PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
-           /* remember reference and change */
-           oattr = (char)(*pte & (PHYS_MODIFIED | PHYS_REFERENCED));
-           /* completely invalidate the PTE */
-           pmap_store_pte(pte, 0);
-
-           if (managed_page(pai)) {
-#if TESTING
-               if (pmap->stats.resident_count < 1)
-                   panic("pmap_enter: resident_count");
-#endif
-               assert(pmap->stats.resident_count >= 1);
-               OSAddAtomic(-1,  &pmap->stats.resident_count);
-
-               if (iswired(*pte)) {
-
-#if TESTING
-                   if (pmap->stats.wired_count < 1)
-                       panic("pmap_enter: wired_count");
-#endif
-                   assert(pmap->stats.wired_count >= 1);
-                   OSAddAtomic(-1,  &pmap->stats.wired_count);
-               }
-
-               pmap_phys_attributes[pai] |= oattr;
-               /*
-                *      Remove the mapping from the pvlist for
-                *      this physical page.
-                *      We'll end up with either a rooted pv or a
-                *      hashed pv
-                */
-               {
-
-                   pv_h = pai_to_pvh(pai);
-
-                   if (pv_h->pmap == PMAP_NULL) {
-                       panic("pmap_enter: null pv_list!");
-                   }
-
-                   if (pv_h->va == vaddr && pv_h->pmap == pmap) {
-                       /*
-                        * Header is the pv_rooted_entry.  
-                        * If there is a next one, copy it to the
-                        * header and free the next one (we cannot
-                        * free the header)
-                        */
-                     pvh_e = (pv_hashed_entry_t)queue_next(&pv_h->qlink);
-                     if (pvh_e != (pv_hashed_entry_t)pv_h) {
-                       pvhash_idx = pvhashidx(pvh_e->pmap, pvh_e->va);
-                       LOCK_PV_HASH(pvhash_idx);
-                         remque(&pvh_e->qlink);
-                         pmap_pvh_unlink(pvh_e);
-                         UNLOCK_PV_HASH(pvhash_idx);
-                         pv_h->pmap = pvh_e->pmap;
-                         pv_h->va = pvh_e->va;
-                       }
-                     else {
-                       pv_h->pmap = PMAP_NULL;
-                       pvh_e = PV_HASHED_ENTRY_NULL;
-                     }
-                   }
-                   else {
-                     pv_hashed_entry_t *pprevh;
-                     ppnum_t old_ppn;
-                     /* wasn't the rooted pv - hash, find it, and unlink it */
-                     old_ppn = (ppnum_t)pa_index(old_pa);
-                     CHK_NPVHASH();
-                     pvhash_idx = pvhashidx(pmap,vaddr);
-                     LOCK_PV_HASH(pvhash_idx);
-                     pprevh = pvhash(pvhash_idx);
-#if PV_DEBUG
-                     if (NULL==pprevh)panic("pmap enter 1");
-#endif
-                     pvh_e = *pprevh;
-                     pmap_pv_hashlist_walks++;
-                     pv_cnt = 0;
-                     while (PV_HASHED_ENTRY_NULL != pvh_e) {
-                       pv_cnt++;
-                       if (pvh_e->pmap == pmap && pvh_e->va == vaddr && pvh_e->ppn == old_ppn) break;
-                       pprevh = &pvh_e->nexth;
-                       pvh_e = pvh_e->nexth;
-                     }
-                     pmap_pv_hashlist_cnts += pv_cnt;
-                     if (pmap_pv_hashlist_max < pv_cnt) pmap_pv_hashlist_max = pv_cnt;
-                     if (PV_HASHED_ENTRY_NULL == pvh_e) panic("pmap_enter: pv not in hash list");
-                     if(NULL==pprevh)panic("pmap enter 2");
-                     *pprevh = pvh_e->nexth;
-                     remque(&pvh_e->qlink);
-                     UNLOCK_PV_HASH(pvhash_idx);
-                   }
-               }
-           }
-           else {
-               /*
-                *      old_pa is not managed.
-                *      Do removal part of accounting.
-                */
-
-               if (iswired(*pte)) {
-                   assert(pmap->stats.wired_count >= 1);
-                   OSAddAtomic(-1,  &pmap->stats.wired_count);
-               }
-           }
-       }
-
-       /*
-        * if we had a previously managed paged locked, unlock it now
-        */
-
-       if (old_pa_locked) {
-         UNLOCK_PVH(pai);
-         old_pa_locked = FALSE;
-       }
-
-       pai = pa_index(pa);     /* now working with new incoming phys page */
-       if (managed_page(pai)) {
-
-           /*
-            *  Step 2) Enter the mapping in the PV list for this
-            *  physical page.
-            */
-           pv_h = pai_to_pvh(pai);
-
-           LOCK_PVH(pai);
-
-           if (pv_h->pmap == PMAP_NULL) {
-               /*
-                *      No mappings yet, use  rooted pv
-                */
-               pv_h->va = vaddr;
-               pv_h->pmap = pmap;
-               queue_init(&pv_h->qlink);
-           }
-           else {
-               /*
-                *      Add new pv_hashed_entry after header.
-                */
-               if ((PV_HASHED_ENTRY_NULL == pvh_e) && pvh_new) {
-                 pvh_e = pvh_new;
-                 pvh_new = PV_HASHED_ENTRY_NULL;  /* show we used it */
-               } else if (PV_HASHED_ENTRY_NULL == pvh_e) {
-                 PV_HASHED_ALLOC(pvh_e);
-                 if (PV_HASHED_ENTRY_NULL == pvh_e) {
-                   /* the pv list is empty.
-                    * if we are on the kernel pmap we'll use one of the special private
-                    * kernel pv_e's, else, we need to unlock everything, zalloc a pv_e,
-                    * and restart bringing in the pv_e with us.
-                    */
-                   if (kernel_pmap == pmap) {
-                     PV_HASHED_KERN_ALLOC(pvh_e);
-                   } else {
-                     UNLOCK_PVH(pai);
-                     PMAP_UNLOCK(pmap);
-                     pvh_new = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone);
-                     goto Retry;
-                   }
-                 }
-               }
-
-               if (PV_HASHED_ENTRY_NULL == pvh_e) panic("pvh_e exhaustion");
-               pvh_e->va = vaddr;
-               pvh_e->pmap = pmap;
-               pvh_e->ppn = pn;
-               CHK_NPVHASH();
-               pvhash_idx = pvhashidx(pmap,vaddr);
-               LOCK_PV_HASH(pvhash_idx);
-               insque(&pvh_e->qlink, &pv_h->qlink);
-               hashp = pvhash(pvhash_idx);
-#if PV_DEBUG
-               if(NULL==hashp)panic("pmap_enter 4");
-#endif
-               pvh_e->nexth = *hashp;
-               *hashp = pvh_e;
-               UNLOCK_PV_HASH(pvhash_idx);
-
-               /*
-                *      Remember that we used the pvlist entry.
-                */
-               pvh_e = PV_HASHED_ENTRY_NULL;
-           }
-
-           /*
-            * only count the mapping
-            * for 'managed memory'
-            */
-           OSAddAtomic(+1,  &pmap->stats.resident_count);
-           if (pmap->stats.resident_count > pmap->stats.resident_max) {
-                   pmap->stats.resident_max = pmap->stats.resident_count;
-           }
-       }
-
-       /*
-        * Step 3) Enter the mapping.
-        *
-        *      Build a template to speed up entering -
-        *      only the pfn changes.
-        */
-       template = pa_to_pte(pa) | INTEL_PTE_VALID;
-
-       if (flags & VM_MEM_NOT_CACHEABLE) {
-               if(!(flags & VM_MEM_GUARDED))
-                       template |= INTEL_PTE_PTA;
-               template |= INTEL_PTE_NCACHE;
-       }
-
-       if (pmap != kernel_pmap)
-               template |= INTEL_PTE_USER;
-       if (prot & VM_PROT_WRITE)
-               template |= INTEL_PTE_WRITE;
-
-       if (set_NX == TRUE)
-               template |= INTEL_PTE_NX;
-
-       if (wired) {
-               template |= INTEL_PTE_WIRED;
-               OSAddAtomic(+1,  &pmap->stats.wired_count);
-       }
-       pmap_store_pte(pte, template);
-
-       /* if this was a managed page we delayed unlocking the pv until here
-        * to prevent pmap_page_protect et al from finding it until the pte
-        * has been stored */
-
-       if (managed_page(pai)) {
-         UNLOCK_PVH(pai);
-       }
-
-Done:
-       if (need_tlbflush == TRUE)
-               PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
-
-       if (pvh_e != PV_HASHED_ENTRY_NULL) {
-               PV_HASHED_FREE_LIST(pvh_e, pvh_e, 1);
-       }
-
-       if (pvh_new != PV_HASHED_ENTRY_NULL) {
-         PV_HASHED_KERN_FREE_LIST(pvh_new, pvh_new, 1);
-       }
-
-       PMAP_UNLOCK(pmap);
-       PMAP_TRACE(PMAP_CODE(PMAP__ENTER) | DBG_FUNC_END, 0, 0, 0, 0, 0);
-}
-
 /*
  *     Routine:        pmap_change_wiring
  *     Function:       Change the wiring attribute for a map/virtual-address
@@ -3917,95 +2751,6 @@ phys_page_exists(
        return TRUE;
 }
 
-void
-mapping_free_prime(void)
-{
-       int             i;
-       pv_hashed_entry_t      pvh_e;
-       pv_hashed_entry_t      pvh_eh;
-       pv_hashed_entry_t      pvh_et;
-       int             pv_cnt;
-
-       pv_cnt = 0;
-       pvh_eh = pvh_et = PV_HASHED_ENTRY_NULL;
-       for (i = 0; i < (5 * PV_HASHED_ALLOC_CHUNK); i++) {
-               pvh_e = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone);
-
-               pvh_e->qlink.next = (queue_entry_t)pvh_eh;
-               pvh_eh = pvh_e;
-
-               if (pvh_et == PV_HASHED_ENTRY_NULL)
-                       pvh_et = pvh_e;
-               pv_cnt++;
-       }
-       PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pv_cnt);
-
-       pv_cnt = 0;
-       pvh_eh = pvh_et = PV_HASHED_ENTRY_NULL;
-       for (i = 0; i < PV_HASHED_KERN_ALLOC_CHUNK; i++) {
-               pvh_e = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone);
-
-               pvh_e->qlink.next = (queue_entry_t)pvh_eh;
-               pvh_eh = pvh_e;
-
-               if (pvh_et == PV_HASHED_ENTRY_NULL)
-                       pvh_et = pvh_e;
-               pv_cnt++;
-       }
-       PV_HASHED_KERN_FREE_LIST(pvh_eh, pvh_et, pv_cnt);
-
-}
-
-void
-mapping_adjust(void)
-{
-       pv_hashed_entry_t      pvh_e;
-       pv_hashed_entry_t      pvh_eh;
-       pv_hashed_entry_t      pvh_et;
-       int             pv_cnt;
-       int             i;
-
-       if (mapping_adjust_call == NULL) {
-               thread_call_setup(&mapping_adjust_call_data,
-                                 (thread_call_func_t) mapping_adjust,
-                                 (thread_call_param_t) NULL);
-               mapping_adjust_call = &mapping_adjust_call_data;
-       }
-
-       pv_cnt = 0;
-       pvh_eh = pvh_et = PV_HASHED_ENTRY_NULL;
-       if (pv_hashed_kern_free_count < PV_HASHED_KERN_LOW_WATER_MARK) {
-               for (i = 0; i < PV_HASHED_KERN_ALLOC_CHUNK; i++) {
-                       pvh_e = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone);
-
-                       pvh_e->qlink.next = (queue_entry_t)pvh_eh;
-                       pvh_eh = pvh_e;
-
-                       if (pvh_et == PV_HASHED_ENTRY_NULL)
-                               pvh_et = pvh_e;
-                       pv_cnt++;
-               }
-               PV_HASHED_KERN_FREE_LIST(pvh_eh, pvh_et, pv_cnt);
-       }
-
-       pv_cnt = 0;
-       pvh_eh = pvh_et = PV_HASHED_ENTRY_NULL;
-       if (pv_hashed_free_count < PV_HASHED_LOW_WATER_MARK) {
-               for (i = 0; i < PV_HASHED_ALLOC_CHUNK; i++) {
-                       pvh_e = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone);
-
-                       pvh_e->qlink.next = (queue_entry_t)pvh_eh;
-                       pvh_eh = pvh_e;
-
-                       if (pvh_et == PV_HASHED_ENTRY_NULL)
-                               pvh_et = pvh_e;
-                       pv_cnt++;
-               }
-               PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pv_cnt);
-       }
-       mappingrecurse = 0;
-}
-
 void
 pmap_commpage32_init(vm_offset_t kernel_commpage, vm_offset_t user_commpage, int cnt)
 {
index 9e6d65d20b9e12c5091b0717bf1b46f336cc8297..0acf265d2ab0a2b24893f64092c9ff073dd744d8 100644 (file)
@@ -432,7 +432,8 @@ enum  high_fixed_addresses {
 #define INTEL_PTE_NX           (1ULL << 63)
 
 #define INTEL_PTE_INVALID       0
-
+/* This is conservative, but suffices */
+#define INTEL_PTE_RSVD         ((1ULL << 8) | (1ULL << 9) | (1ULL << 10) | (1ULL << 11) | (0x1FFULL << 54))
 #define        pa_to_pte(a)            ((a) & INTEL_PTE_PFN) /* XXX */
 #define        pte_to_pa(p)            ((p) & INTEL_PTE_PFN) /* XXX */
 #define        pte_increment_pa(p)     ((p) += INTEL_OFFMASK+1)
@@ -704,7 +705,7 @@ extern vm_offset_t pmap_high_shared_remap(enum high_fixed_addresses, vm_offset_t
 #endif
 
 extern void pt_fake_zone_info(int *, vm_size_t *, vm_size_t *, vm_size_t *, vm_size_t *, int *, int *);
-
+extern void pmap_pagetable_corruption_msg_log(int (*)(const char * fmt, ...)__printflike(1,2));
 
 
 /*
index 1a1105399083b9351b3c791a8b04530c476d9146..04f4aa0081c1d721eb3e4797d7951b74e54dc4df 100644 (file)
@@ -28,6 +28,7 @@
 
 #include <vm/pmap.h>
 #include <sys/kdebug.h>
+#include <kern/debug.h>
 
 #ifdef MACH_KERNEL_PRIVATE
 
@@ -43,7 +44,6 @@
        simple_unlock(&(pmap)->lock);           \
 }
 
-extern void pmap_flush_tlbs(pmap_t pmap);
 
 #define PMAP_UPDATE_TLBS(pmap, s, e)                                   \
        pmap_flush_tlbs(pmap)
@@ -67,10 +67,698 @@ void               pmap_expand_pml4(
 void           pmap_expand_pdpt(
                        pmap_t          map,
                        vm_map_offset_t v);
+extern void    pmap_flush_tlbs(pmap_t pmap);
+
 #if    defined(__x86_64__)
 extern const boolean_t cpu_64bit;
 #else
 extern boolean_t cpu_64bit;
 #endif
 
+/*
+ *     Private data structures.
+ */
+
+/*
+ *     For each vm_page_t, there is a list of all currently
+ *     valid virtual mappings of that page.  An entry is
+ *     a pv_rooted_entry_t; the list is the pv_table.
+ *
+ *      N.B.  with the new combo rooted/hashed scheme it is
+ *      only possibly to remove individual non-rooted entries
+ *      if they are found via the hashed chains as there is no
+ *      way to unlink the singly linked hashed entries if navigated to
+ *      via the queue list off the rooted entries.  Think of it as
+ *      hash/walk/pull, keeping track of the prev pointer while walking
+ *      the singly linked hash list.  All of this is to save memory and
+ *      keep both types of pv_entries as small as possible.
+ */
+
+/*
+
+PV HASHING Changes - JK 1/2007
+
+Pve's establish physical to virtual mappings.  These are used for aliasing of a 
+physical page to (potentially many) virtual addresses within pmaps. In the previous 
+implementation the structure of the pv_entries (each 16 bytes in size) was
+
+typedef struct pv_entry {
+    struct pv_entry_t    next;
+    pmap_t                    pmap;
+    vm_map_offset_t   va;
+} *pv_entry_t;
+
+An initial array of these is created at boot time, one per physical page of memory, 
+indexed by the physical page number. Additionally, a pool of entries is created from a 
+pv_zone to be used as needed by pmap_enter() when it is creating new mappings.  
+Originally, we kept this pool around because the code in pmap_enter() was unable to 
+block if it needed an entry and none were available - we'd panic.  Some time ago I 
+restructured the pmap_enter() code so that for user pmaps it can block while zalloc'ing 
+a pv structure and restart, removing a panic from the code (in the case of the kernel 
+pmap we cannot block and still panic, so, we keep a separate hot pool for use only on 
+kernel pmaps).  The pool has not been removed since there is a large performance gain 
+keeping freed pv's around for reuse and not suffering the overhead of zalloc for every new pv we need.
+
+As pmap_enter() created new mappings it linked the new pve's for them off the fixed 
+pv array for that ppn (off the next pointer).  These pve's are accessed for several 
+operations, one of them being address space teardown.  In that case, we basically do this
+
+       for (every page/pte in the space) {
+               calc pve_ptr from the ppn in the pte
+               for (every pv in the list for the ppn) {
+                       if (this pv is for this pmap/vaddr) {
+                               do housekeeping
+                               unlink/free the pv
+                       }
+               }
+       }
+
+The problem arose when we were running, say 8000 (or even 2000) apache or other processes 
+and one or all terminate. The list hanging off each pv array entry could have thousands of 
+entries.  We were continuously linearly searching each of these lists as we stepped through 
+the address space we were tearing down.  Because of the locks we hold, likely taking a cache 
+miss for each node,  and interrupt disabling for MP issues the system became completely 
+unresponsive for many seconds while we did this.
+
+Realizing that pve's are accessed in two distinct ways (linearly running the list by ppn 
+for operations like pmap_page_protect and finding and modifying/removing a single pve as 
+part of pmap_enter processing) has led to modifying the pve structures and databases.
+
+There are now two types of pve structures.  A "rooted" structure which is basically the 
+original structure accessed in an array by ppn, and a ''hashed'' structure accessed on a 
+hash list via a hash of [pmap, vaddr].  These have been designed with the two goals of 
+minimizing wired memory and making the lookup of a ppn faster.  Since a vast majority of 
+pages in the system are not aliased and hence represented by a single pv entry I've kept 
+the rooted entry size as small as possible because there is one of these dedicated for 
+every physical page of memory.  The hashed pve's are larger due to the addition of the hash 
+link and the ppn entry needed for matching while running the hash list to find the entry we 
+are looking for.  This way, only systems that have lots of aliasing (like 2000+ httpd procs) 
+will pay the extra memory price. Both structures have the same first three fields allowing 
+some simplification in the code.
+
+They have these shapes
+
+typedef struct pv_rooted_entry {
+        queue_head_t qlink;
+        vm_map_offset_t va;
+        pmap_t          pmap;
+} *pv_rooted_entry_t;
+
+
+typedef struct pv_hashed_entry {
+  queue_head_t qlink;
+  vm_map_offset_t va;
+  pmap_t        pmap;
+  ppnum_t ppn;
+  struct pv_hashed_entry *nexth;
+} *pv_hashed_entry_t;
+
+The main flow difference is that the code is now aware of the rooted entry and the hashed 
+entries.  Code that runs the pv list still starts with the rooted entry and then continues 
+down the qlink onto the hashed entries.  Code that is looking up a specific pv entry first 
+checks the rooted entry and then hashes and runs the hash list for the match. The hash list 
+lengths are much smaller than the original pv lists that contained all aliases for the specific ppn.
+
+*/
+
+typedef struct pv_rooted_entry {     /* first three entries must match pv_hashed_entry_t */
+        queue_head_t qlink;
+       vm_map_offset_t va;             /* virtual address for mapping */
+       pmap_t          pmap;           /* pmap where mapping lies */
+} *pv_rooted_entry_t;
+
+#define PV_ROOTED_ENTRY_NULL   ((pv_rooted_entry_t) 0)
+
+
+typedef struct pv_hashed_entry {     /* first three entries must match pv_rooted_entry_t */
+       queue_head_t qlink;
+       vm_map_offset_t va;
+       pmap_t        pmap;
+       ppnum_t ppn;
+       struct pv_hashed_entry *nexth;
+} *pv_hashed_entry_t;
+
+#define PV_HASHED_ENTRY_NULL ((pv_hashed_entry_t)0)
+
+/* #define PV_DEBUG 1   uncomment to enable some PV debugging code */
+#ifdef PV_DEBUG
+#define CHK_NPVHASH() if(0 == npvhash) panic("npvhash uninitialized");
+#else
+#define CHK_NPVHASH()
+#endif
+
+#define NPVHASH 4095   /* MUST BE 2^N - 1 */
+#define PV_HASHED_LOW_WATER_MARK 5000
+#define PV_HASHED_KERN_LOW_WATER_MARK 400
+#define PV_HASHED_ALLOC_CHUNK 2000
+#define PV_HASHED_KERN_ALLOC_CHUNK 200
+
+#define        PV_HASHED_ALLOC(pvh_e) { \
+       simple_lock(&pv_hashed_free_list_lock); \
+       if ((pvh_e = pv_hashed_free_list) != 0) { \
+         pv_hashed_free_list = (pv_hashed_entry_t)pvh_e->qlink.next;   \
+         pv_hashed_free_count--;                                       \
+         if (pv_hashed_free_count < PV_HASHED_LOW_WATER_MARK)          \
+                 if (hw_compare_and_store(0,1,(u_int *)&mappingrecurse)) \
+                         thread_call_enter(mapping_adjust_call);       \
+       }                                                               \
+       simple_unlock(&pv_hashed_free_list_lock); \
+}
+
+#define        PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pv_cnt) {   \
+       simple_lock(&pv_hashed_free_list_lock); \
+       pvh_et->qlink.next = (queue_entry_t)pv_hashed_free_list;        \
+       pv_hashed_free_list = pvh_eh; \
+       pv_hashed_free_count += pv_cnt;           \
+       simple_unlock(&pv_hashed_free_list_lock); \
+}
+
+#define        PV_HASHED_KERN_ALLOC(pvh_e) { \
+       simple_lock(&pv_hashed_kern_free_list_lock); \
+       if ((pvh_e = pv_hashed_kern_free_list) != 0) { \
+         pv_hashed_kern_free_list = (pv_hashed_entry_t)pvh_e->qlink.next;      \
+         pv_hashed_kern_free_count--;                                  \
+         if (pv_hashed_kern_free_count < PV_HASHED_KERN_LOW_WATER_MARK) \
+                 if (hw_compare_and_store(0,1,(u_int *)&mappingrecurse)) \
+                         thread_call_enter(mapping_adjust_call);       \
+       }                                                               \
+       simple_unlock(&pv_hashed_kern_free_list_lock); \
+}
+
+#define        PV_HASHED_KERN_FREE_LIST(pvh_eh, pvh_et, pv_cnt) {       \
+       simple_lock(&pv_hashed_kern_free_list_lock); \
+       pvh_et->qlink.next = (queue_entry_t)pv_hashed_kern_free_list;   \
+       pv_hashed_kern_free_list = pvh_eh; \
+       pv_hashed_kern_free_count += pv_cnt;           \
+       simple_unlock(&pv_hashed_kern_free_list_lock); \
+}
+
+/*
+ *     Index into pv_head table, its lock bits, and the modify/reference and managed bits
+ */
+
+#define pa_index(pa)           (i386_btop(pa))
+#define ppn_to_pai(ppn)                ((int)ppn)
+
+#define pai_to_pvh(pai)                (&pv_head_table[pai])
+#define lock_pvh_pai(pai)      bit_lock(pai, (void *)pv_lock_table)
+#define unlock_pvh_pai(pai)    bit_unlock(pai, (void *)pv_lock_table)
+#define pvhash(idx)            (&pv_hash_table[idx])
+
+#define lock_hash_hash(hash)   bit_lock(hash, (void *)pv_hash_lock_table)
+#define unlock_hash_hash(hash) bit_unlock(hash, (void *)pv_hash_lock_table)
+
+#define IS_MANAGED_PAGE(x)                             \
+       ((unsigned int)(x) <= last_managed_page &&      \
+        (pmap_phys_attributes[x] & PHYS_MANAGED))
+
+/*
+ *     Physical page attributes.  Copy bits from PTE definition.
+ */
+#define        PHYS_MODIFIED   INTEL_PTE_MOD   /* page modified */
+#define        PHYS_REFERENCED INTEL_PTE_REF   /* page referenced */
+#define PHYS_MANAGED   INTEL_PTE_VALID /* page is managed */
+
+/*
+ *     Amount of virtual memory mapped by one
+ *     page-directory entry.
+ */
+#define        PDE_MAPPED_SIZE         (pdetova(1))
+
+
+/*
+ *     Locking and TLB invalidation
+ */
+
+/*
+ *     Locking Protocols: (changed 2/2007 JK)
+ *
+ *     There are two structures in the pmap module that need locking:
+ *     the pmaps themselves, and the per-page pv_lists (which are locked
+ *     by locking the pv_lock_table entry that corresponds to the pv_head
+ *     for the list in question.)  Most routines want to lock a pmap and
+ *     then do operations in it that require pv_list locking -- however
+ *     pmap_remove_all and pmap_copy_on_write operate on a physical page
+ *     basis and want to do the locking in the reverse order, i.e. lock
+ *     a pv_list and then go through all the pmaps referenced by that list.
+ *
+ *      The system wide pmap lock has been removed. Now, paths take a lock
+ *      on the pmap before changing its 'shape' and the reverse order lockers
+ *      (coming in by phys ppn) take a lock on the corresponding pv and then
+ *      retest to be sure nothing changed during the window before they locked
+ *      and can then run up/down the pv lists holding the list lock. This also
+ *      lets the pmap layer run (nearly completely) interrupt enabled, unlike
+ *      previously.
+ */
+
+/*
+ * PV locking
+ */
+
+#define LOCK_PVH(index)        {               \
+       mp_disable_preemption();        \
+       lock_pvh_pai(index);            \
+}
+
+#define UNLOCK_PVH(index) {            \
+       unlock_pvh_pai(index);          \
+       mp_enable_preemption();         \
+}
+/*
+ * PV hash locking
+ */
+
+#define LOCK_PV_HASH(hash)         lock_hash_hash(hash)
+#define UNLOCK_PV_HASH(hash)       unlock_hash_hash(hash)
+extern uint32_t npvhash;
+extern pv_hashed_entry_t       *pv_hash_table;  /* hash lists */
+extern pv_hashed_entry_t       pv_hashed_free_list;
+extern pv_hashed_entry_t       pv_hashed_kern_free_list;
+decl_simple_lock_data(extern, pv_hashed_free_list_lock)
+decl_simple_lock_data(extern, pv_hashed_kern_free_list_lock)
+decl_simple_lock_data(extern, pv_hash_table_lock)
+
+extern zone_t          pv_hashed_list_zone;    /* zone of pv_hashed_entry structures */
+
+extern int                     pv_hashed_free_count;
+extern int                     pv_hashed_kern_free_count;
+#define pv_lock_table_size(n)  (((n)+BYTE_SIZE-1)/BYTE_SIZE)
+#define pv_hash_lock_table_size(n)  (((n)+BYTE_SIZE-1)/BYTE_SIZE)
+extern char    *pv_lock_table;         /* pointer to array of bits */
+
+extern char    *pv_hash_lock_table;
+extern pv_rooted_entry_t       pv_head_table;          /* array of entries, one
+                                                        * per page */
+extern uint64_t pde_mapped_size;
+
+extern char            *pmap_phys_attributes;
+extern unsigned int    last_managed_page;
+
+/*
+ * when spinning through pmap_remove
+ * ensure that we don't spend too much
+ * time with preemption disabled.
+ * I'm setting the current threshold
+ * to 20us
+ */
+#define MAX_PREEMPTION_LATENCY_NS 20000
+extern uint64_t max_preemption_latency_tsc;
+
+/* #define DEBUGINTERRUPTS 1  uncomment to ensure pmap callers have interrupts enabled */
+#ifdef DEBUGINTERRUPTS
+#define pmap_intr_assert() {                                                   \
+       if (processor_avail_count > 1 && !ml_get_interrupts_enabled())          \
+               panic("pmap interrupt assert %s, %d",__FILE__, __LINE__);       \
+}
+#else
+#define pmap_intr_assert()
+#endif
+
+extern int             nx_enabled;
+extern unsigned int    inuse_ptepages_count;
+
+static inline uint32_t
+pvhashidx(pmap_t pmap, vm_map_offset_t va)
+{
+       return ((uint32_t)(uintptr_t)pmap ^
+               ((uint32_t)((uint64_t)va >> PAGE_SHIFT) & 0xFFFFFFFF)) &
+              npvhash;
+}
+
+/*
+ * unlinks the pv_hashed_entry_t pvh from the singly linked hash chain.
+ * properly deals with the anchor.
+ * must be called with the hash locked, does not unlock it
+ */
+
+static inline void 
+pmap_pvh_unlink(pv_hashed_entry_t pvh)
+{
+       pv_hashed_entry_t       curh;
+       pv_hashed_entry_t       *pprevh;
+       int                     pvhash_idx;
+
+       CHK_NPVHASH();
+       pvhash_idx = pvhashidx(pvh->pmap, pvh->va);
+
+       pprevh = pvhash(pvhash_idx);
+
+#if PV_DEBUG
+       if (NULL == *pprevh)
+               panic("pvh_unlink null anchor"); /* JK DEBUG */
+#endif
+       curh = *pprevh;
+
+       while (PV_HASHED_ENTRY_NULL != curh) {
+               if (pvh == curh)
+                       break;
+               pprevh = &curh->nexth;
+               curh = curh->nexth;
+       }
+       if (PV_HASHED_ENTRY_NULL == curh) panic("pmap_pvh_unlink no pvh");
+       *pprevh = pvh->nexth;
+       return;
+}
+
+static inline void
+pv_hash_add(pv_hashed_entry_t  pvh_e,
+           pv_rooted_entry_t   pv_h)
+{
+       pv_hashed_entry_t       *hashp;
+       int                     pvhash_idx;
+
+       CHK_NPVHASH();
+       pvhash_idx = pvhashidx(pvh_e->pmap, pvh_e->va);
+       LOCK_PV_HASH(pvhash_idx);
+       insque(&pvh_e->qlink, &pv_h->qlink);
+       hashp = pvhash(pvhash_idx);
+#if PV_DEBUG
+       if (NULL==hashp)
+               panic("pv_hash_add(%p) null hash bucket", pvh_e);
+#endif
+       pvh_e->nexth = *hashp;
+       *hashp = pvh_e;
+       UNLOCK_PV_HASH(pvhash_idx);
+}
+
+static inline void
+pv_hash_remove(pv_hashed_entry_t pvh_e)
+{
+       int                     pvhash_idx;
+
+       CHK_NPVHASH();
+       pvhash_idx = pvhashidx(pvh_e->pmap,pvh_e->va);
+       LOCK_PV_HASH(pvhash_idx);
+       remque(&pvh_e->qlink);
+       pmap_pvh_unlink(pvh_e);
+       UNLOCK_PV_HASH(pvhash_idx);
+}
+
+static inline boolean_t popcnt1(uint64_t distance) {
+       return ((distance & (distance - 1)) == 0);
+}
+
+/*
+ * Routines to handle suppression of/recovery from some forms of pagetable corruption
+ * incidents observed in the field. These can be either software induced (wild
+ * stores to the mapwindows where applicable, use after free errors
+ * (typically of pages addressed physically), mis-directed DMAs etc., or due
+ * to DRAM/memory hierarchy/interconnect errors. Given the theoretical rarity of these errors,
+ * the recording mechanism is deliberately not MP-safe. The overarching goal is to
+ * still assert on potential software races, but attempt recovery from incidents
+ * identifiable as occurring due to issues beyond the control of the pmap module.
+ * The latter includes single-bit errors and malformed pagetable entries.
+ * We currently limit ourselves to recovery/suppression of one incident per
+ * PMAP_PAGETABLE_CORRUPTION_INTERVAL seconds, and details of the incident
+ * are logged.
+ * Assertions are not suppressed if kernel debugging is enabled. (DRK 09)
+ */
+
+typedef enum {
+       PTE_VALID               = 0x0,
+       PTE_INVALID             = 0x1,
+       PTE_RSVD                = 0x2,
+       PTE_SUPERVISOR          = 0x4,
+       PTE_BITFLIP             = 0x8,
+       PV_BITFLIP              = 0x10,
+       PTE_INVALID_CACHEABILITY = 0x20
+} pmap_pagetable_corruption_t;
+
+typedef enum {
+       ROOT_PRESENT = 0,
+       ROOT_ABSENT = 1
+} pmap_pv_assertion_t;
+
+typedef enum {
+       PMAP_ACTION_IGNORE      = 0x0,
+       PMAP_ACTION_ASSERT      = 0x1,
+       PMAP_ACTION_RETRY       = 0x2,
+       PMAP_ACTION_RETRY_RELOCK = 0x4
+} pmap_pagetable_corruption_action_t;
+
+#define        PMAP_PAGETABLE_CORRUPTION_INTERVAL (6ULL * 3600ULL)
+extern uint64_t pmap_pagetable_corruption_interval_abstime;
+
+extern uint32_t pmap_pagetable_corruption_incidents;
+#define PMAP_PAGETABLE_CORRUPTION_MAX_LOG (8)
+typedef struct {
+       pmap_pv_assertion_t incident;
+       pmap_pagetable_corruption_t reason;
+       pmap_pagetable_corruption_action_t action;
+       pmap_t  pmap;
+       vm_map_offset_t vaddr;
+       pt_entry_t pte;
+       ppnum_t ppn;
+       pmap_t pvpmap;
+       vm_map_offset_t pvva;
+       uint64_t abstime;
+} pmap_pagetable_corruption_record_t;
+
+extern pmap_pagetable_corruption_record_t pmap_pagetable_corruption_records[];
+extern uint64_t pmap_pagetable_corruption_last_abstime;
+extern thread_call_t   pmap_pagetable_corruption_log_call;
+extern boolean_t pmap_pagetable_corruption_timeout;
+
+static inline void
+pmap_pagetable_corruption_log(pmap_pv_assertion_t incident, pmap_pagetable_corruption_t suppress_reason, pmap_pagetable_corruption_action_t action, pmap_t pmap, vm_map_offset_t vaddr, pt_entry_t *ptep, ppnum_t ppn, pmap_t pvpmap, vm_map_offset_t pvva) {
+       uint32_t pmap_pagetable_corruption_log_index;
+       pmap_pagetable_corruption_log_index = pmap_pagetable_corruption_incidents++ % PMAP_PAGETABLE_CORRUPTION_MAX_LOG;
+       pmap_pagetable_corruption_records[pmap_pagetable_corruption_log_index].incident = incident;
+       pmap_pagetable_corruption_records[pmap_pagetable_corruption_log_index].reason = suppress_reason;
+       pmap_pagetable_corruption_records[pmap_pagetable_corruption_log_index].action = action;
+       pmap_pagetable_corruption_records[pmap_pagetable_corruption_log_index].pmap = pmap;
+       pmap_pagetable_corruption_records[pmap_pagetable_corruption_log_index].vaddr = vaddr;
+       pmap_pagetable_corruption_records[pmap_pagetable_corruption_log_index].pte = *ptep;
+       pmap_pagetable_corruption_records[pmap_pagetable_corruption_log_index].ppn = ppn;
+       pmap_pagetable_corruption_records[pmap_pagetable_corruption_log_index].pvpmap = pvpmap;
+       pmap_pagetable_corruption_records[pmap_pagetable_corruption_log_index].pvva = pvva;
+       pmap_pagetable_corruption_records[pmap_pagetable_corruption_log_index].abstime = mach_absolute_time();
+       /* Asynchronously log */
+       thread_call_enter(pmap_pagetable_corruption_log_call);
+}
+
+static inline pmap_pagetable_corruption_action_t
+pmap_classify_pagetable_corruption(pmap_t pmap, vm_map_offset_t vaddr, ppnum_t *ppnp, pt_entry_t *ptep, pmap_pv_assertion_t incident) {
+       pmap_pv_assertion_t     action = PMAP_ACTION_ASSERT;
+       pmap_pagetable_corruption_t     suppress_reason = PTE_VALID;
+       ppnum_t                 suppress_ppn = 0;
+       pt_entry_t cpte = *ptep;
+       ppnum_t cpn = pa_index(pte_to_pa(cpte));
+       ppnum_t ppn = *ppnp;
+       pv_rooted_entry_t       pv_h = pai_to_pvh(ppn_to_pai(ppn));
+       pv_rooted_entry_t       pv_e = pv_h;
+       uint32_t        bitdex;
+       pmap_t pvpmap = pv_h->pmap;
+       vm_map_offset_t pvva = pv_h->va;
+       boolean_t ppcd = FALSE;
+
+       /* Ideally, we'd consult the Mach VM here to definitively determine
+        * the nature of the mapping for this address space and address.
+        * As that would be a layering violation in this context, we
+        * use various heuristics to recover from single bit errors,
+        * malformed pagetable entries etc. These are not intended
+        * to be comprehensive.
+        */
+
+       /* As a precautionary measure, mark A+D */
+       pmap_phys_attributes[ppn_to_pai(ppn)] |= (PHYS_MODIFIED | PHYS_REFERENCED);
+
+       /*
+        * Correct potential single bit errors in either (but not both) element
+        * of the PV
+        */
+       do {
+               if ((popcnt1((uintptr_t)pv_e->pmap ^ (uintptr_t)pmap) && pv_e->va == vaddr) ||
+                   (pv_e->pmap == pmap && popcnt1(pv_e->va ^ vaddr))) {
+                       pv_e->pmap = pmap;
+                       pv_e->va = vaddr;
+                       suppress_reason = PV_BITFLIP;
+                       action = PMAP_ACTION_RETRY;
+                       goto pmap_cpc_exit;
+               }
+       } while((pv_e = (pv_rooted_entry_t) queue_next(&pv_e->qlink)) != pv_h);
+
+       /* Discover root entries with a Hamming
+        * distance of 1 from the supplied
+        * physical page frame.
+        */
+       for (bitdex = 0; bitdex < (sizeof(ppnum_t) << 3); bitdex++) {
+               ppnum_t npn = cpn ^ (ppnum_t) (1ULL << bitdex);
+               if (IS_MANAGED_PAGE(npn)) {
+                       pv_rooted_entry_t npv_h = pai_to_pvh(ppn_to_pai(npn));
+                       if (npv_h->va == vaddr && npv_h->pmap == pmap) {
+                               suppress_reason = PTE_BITFLIP;
+                               suppress_ppn = npn;
+                               action = PMAP_ACTION_RETRY_RELOCK;
+                               UNLOCK_PVH(ppn_to_pai(ppn));
+                               *ppnp = npn;
+                               goto pmap_cpc_exit;
+                       }
+               }
+       }
+
+       if (pmap == kernel_pmap) {
+               action = PMAP_ACTION_ASSERT;
+               goto pmap_cpc_exit;
+       }
+
+       /* Check for malformed/inconsistent entries */
+
+       if ((cpte & (INTEL_PTE_NCACHE | INTEL_PTE_WTHRU | INTEL_PTE_PTA)) ==  (INTEL_PTE_NCACHE | INTEL_PTE_WTHRU)) {
+               action = PMAP_ACTION_IGNORE;
+               suppress_reason = PTE_INVALID_CACHEABILITY;
+       }
+       else if (cpte & INTEL_PTE_RSVD) {
+               action = PMAP_ACTION_IGNORE;
+               suppress_reason = PTE_RSVD;
+       }
+       else if ((pmap != kernel_pmap) && ((cpte & INTEL_PTE_USER) == 0)) {
+               action = PMAP_ACTION_IGNORE;
+               suppress_reason = PTE_SUPERVISOR;
+       }
+pmap_cpc_exit:
+       PE_parse_boot_argn("-pmap_pagetable_corruption_deassert", &ppcd, sizeof(ppcd));
+
+       if (debug_boot_arg && !ppcd) {
+               action = PMAP_ACTION_ASSERT;
+       }
+
+       if ((mach_absolute_time() - pmap_pagetable_corruption_last_abstime) < pmap_pagetable_corruption_interval_abstime) {
+               action = PMAP_ACTION_ASSERT;
+               pmap_pagetable_corruption_timeout = TRUE;
+       }
+       else
+       {
+               pmap_pagetable_corruption_last_abstime = mach_absolute_time();
+       }
+       pmap_pagetable_corruption_log(incident, suppress_reason, action, pmap, vaddr, &cpte, *ppnp, pvpmap, pvva);
+       return action;
+}
+/*
+ * Remove pv list entry.
+ * Called with pv_head_table entry locked.
+ * Returns pv entry to be freed (or NULL).
+ */
+
+static inline __attribute__((always_inline)) pv_hashed_entry_t
+pmap_pv_remove( pmap_t         pmap,
+               vm_map_offset_t vaddr,
+               ppnum_t         *ppnp,
+               pt_entry_t      *pte) 
+{
+       pv_hashed_entry_t       pvh_e;
+       pv_rooted_entry_t       pv_h;
+       pv_hashed_entry_t       *pprevh;
+       int                     pvhash_idx;
+       uint32_t                pv_cnt;
+       ppnum_t                 ppn;
+
+pmap_pv_remove_retry:
+       ppn = *ppnp;
+       pvh_e = PV_HASHED_ENTRY_NULL;
+       pv_h = pai_to_pvh(ppn_to_pai(ppn));
+
+       if (pv_h->pmap == PMAP_NULL) {
+               pmap_pagetable_corruption_action_t pac = pmap_classify_pagetable_corruption(pmap, vaddr, ppnp, pte, ROOT_ABSENT);
+               if (pac == PMAP_ACTION_IGNORE)
+                       goto pmap_pv_remove_exit;
+               else if (pac == PMAP_ACTION_ASSERT)
+                       panic("pmap_pv_remove(%p,0x%llx,0x%x, 0x%llx): null pv_list!", pmap, vaddr, ppn, *pte);
+               else if (pac == PMAP_ACTION_RETRY_RELOCK) {
+                       LOCK_PVH(ppn_to_pai(*ppnp));
+                       pmap_phys_attributes[ppn_to_pai(*ppnp)] |= (PHYS_MODIFIED | PHYS_REFERENCED);
+                       goto pmap_pv_remove_retry;
+               }
+               else if (pac == PMAP_ACTION_RETRY)
+                       goto pmap_pv_remove_retry;
+       }
+
+       if (pv_h->va == vaddr && pv_h->pmap == pmap) {
+               /*
+                * Header is the pv_rooted_entry.
+                * We can't free that. If there is a queued
+                * entry after this one we remove that
+                * from the ppn queue, we remove it from the hash chain
+                * and copy it to the rooted entry. Then free it instead.
+                */
+               pvh_e = (pv_hashed_entry_t) queue_next(&pv_h->qlink);
+               if (pv_h != (pv_rooted_entry_t) pvh_e) {
+                       /*
+                        * Entry queued to root, remove this from hash
+                        * and install as new root.
+                        */
+                       CHK_NPVHASH();
+                       pvhash_idx = pvhashidx(pvh_e->pmap, pvh_e->va);
+                       LOCK_PV_HASH(pvhash_idx);
+                       remque(&pvh_e->qlink);
+                       pprevh = pvhash(pvhash_idx);
+                       if (PV_HASHED_ENTRY_NULL == *pprevh) {
+                               panic("pmap_pv_remove(%p,0x%llx,0x%x): "
+                                     "empty hash, removing rooted",
+                                     pmap, vaddr, ppn);
+                       }
+                       pmap_pvh_unlink(pvh_e);
+                       UNLOCK_PV_HASH(pvhash_idx);
+                       pv_h->pmap = pvh_e->pmap;
+                       pv_h->va = pvh_e->va;   /* dispose of pvh_e */
+               } else {
+                       /* none queued after rooted */
+                       pv_h->pmap = PMAP_NULL;
+                       pvh_e = PV_HASHED_ENTRY_NULL;
+               }
+       } else {
+               /*
+                * not removing rooted pv. find it on hash chain, remove from
+                * ppn queue and hash chain and free it
+                */
+               CHK_NPVHASH();
+               pvhash_idx = pvhashidx(pmap, vaddr);
+               LOCK_PV_HASH(pvhash_idx);
+               pprevh = pvhash(pvhash_idx);
+               if (PV_HASHED_ENTRY_NULL == *pprevh) {
+                       panic("pmap_pv_remove(%p,0x%llx,0x%x): empty hash", pmap, vaddr, ppn);
+               }
+               pvh_e = *pprevh;
+               pmap_pv_hashlist_walks++;
+               pv_cnt = 0;
+               while (PV_HASHED_ENTRY_NULL != pvh_e) {
+                       pv_cnt++;
+                       if (pvh_e->pmap == pmap &&
+                           pvh_e->va == vaddr &&
+                           pvh_e->ppn == ppn)
+                               break;
+                       pprevh = &pvh_e->nexth;
+                       pvh_e = pvh_e->nexth;
+               }
+               if (PV_HASHED_ENTRY_NULL == pvh_e) {
+                       pmap_pagetable_corruption_action_t pac = pmap_classify_pagetable_corruption(pmap, vaddr, ppnp, pte, ROOT_PRESENT);
+
+                       if (pac == PMAP_ACTION_ASSERT)
+                               panic("pmap_pv_remove(%p,0x%llx,0x%x, 0x%llx): pv not on hash, head: %p, 0x%llx", pmap, vaddr, ppn, *pte, pv_h->pmap, pv_h->va);
+                       else {
+                               UNLOCK_PV_HASH(pvhash_idx);
+                               if (pac == PMAP_ACTION_RETRY_RELOCK) {
+                                       LOCK_PVH(ppn_to_pai(*ppnp));
+                                       pmap_phys_attributes[ppn_to_pai(*ppnp)] |= (PHYS_MODIFIED | PHYS_REFERENCED);
+                                       goto pmap_pv_remove_retry;
+                               }
+                               else if (pac == PMAP_ACTION_RETRY) {
+                                       goto pmap_pv_remove_retry;
+                               }
+                               else if (pac == PMAP_ACTION_IGNORE) {
+                                       goto pmap_pv_remove_exit;
+                               }
+                       }
+               }
+               pmap_pv_hashlist_cnts += pv_cnt;
+               if (pmap_pv_hashlist_max < pv_cnt)
+                       pmap_pv_hashlist_max = pv_cnt;
+               *pprevh = pvh_e->nexth;
+               remque(&pvh_e->qlink);
+               UNLOCK_PV_HASH(pvhash_idx);
+       }
+pmap_pv_remove_exit:
+       return pvh_e;
+}
+
 #endif /* MACH_KERNEL_PRIVATE */
index de9b75835b4a259b92ee871f94a29bb71b526660..53c1996e1eb227a6ec5e11305eb93cfd5c92bdbc 100644 (file)
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <i386/pmap_internal.h>
+
+
+void           pmap_remove_range(
+                       pmap_t          pmap,
+                       vm_map_offset_t va,
+                       pt_entry_t      *spte,
+                       pt_entry_t      *epte);
+
+pv_rooted_entry_t      pv_head_table;          /* array of entries, one per
+                                                * page */
+thread_call_t          mapping_adjust_call;
+static thread_call_data_t mapping_adjust_call_data;
+uint32_t               mappingrecurse = 0;
+
+pmap_pagetable_corruption_record_t pmap_pagetable_corruption_records[PMAP_PAGETABLE_CORRUPTION_MAX_LOG];
+uint32_t pmap_pagetable_corruption_incidents;
+uint64_t pmap_pagetable_corruption_last_abstime = (~(0ULL) >> 1);
+uint64_t pmap_pagetable_corruption_interval_abstime;
+thread_call_t  pmap_pagetable_corruption_log_call;
+static thread_call_data_t      pmap_pagetable_corruption_log_call_data;
+boolean_t pmap_pagetable_corruption_timeout = FALSE;
+
 /*
  * The Intel platform can nest at the PDE level, so NBPDE (i.e. 2MB) at a time,
  * on a NBPDE boundary.
@@ -315,3 +337,942 @@ pfp_exit:
         return ppn;
 }
 
+/*
+ *     Insert the given physical page (p) at
+ *     the specified virtual address (v) in the
+ *     target physical map with the protection requested.
+ *
+ *     If specified, the page will be wired down, meaning
+ *     that the related pte cannot be reclaimed.
+ *
+ *     NB:  This is the only routine which MAY NOT lazy-evaluate
+ *     or lose information.  That is, this routine must actually
+ *     insert this page into the given map NOW.
+ */
+void
+pmap_enter(
+       register pmap_t         pmap,
+       vm_map_offset_t         vaddr,
+       ppnum_t                 pn,
+       vm_prot_t               prot,
+       unsigned int            flags,
+       boolean_t               wired)
+{
+       pt_entry_t              *pte;
+       pv_rooted_entry_t       pv_h;
+       int                     pai;
+       pv_hashed_entry_t       pvh_e;
+       pv_hashed_entry_t       pvh_new;
+       pt_entry_t              template;
+       pmap_paddr_t            old_pa;
+       pmap_paddr_t            pa = (pmap_paddr_t) i386_ptob(pn);
+       boolean_t               need_tlbflush = FALSE;
+       boolean_t               set_NX;
+       char                    oattr;
+       boolean_t               old_pa_locked;
+       /* 2MiB mappings are confined to x86_64 by VM */
+       boolean_t               superpage = flags & VM_MEM_SUPERPAGE;
+       vm_object_t             delpage_pm_obj = NULL;
+       int                     delpage_pde_index = 0;
+       pt_entry_t              old_pte;
+
+       pmap_intr_assert();
+       assert(pn != vm_page_fictitious_addr);
+
+       if (pmap == PMAP_NULL)
+               return;
+       if (pn == vm_page_guard_addr)
+               return;
+
+       PMAP_TRACE(PMAP_CODE(PMAP__ENTER) | DBG_FUNC_START,
+                  pmap,
+                  (uint32_t) (vaddr >> 32), (uint32_t) vaddr,
+                  pn, prot);
+
+       if ((prot & VM_PROT_EXECUTE) || !nx_enabled || !pmap->nx_enabled)
+               set_NX = FALSE;
+       else
+               set_NX = TRUE;
+
+       /*
+        *      Must allocate a new pvlist entry while we're unlocked;
+        *      zalloc may cause pageout (which will lock the pmap system).
+        *      If we determine we need a pvlist entry, we will unlock
+        *      and allocate one.  Then we will retry, throughing away
+        *      the allocated entry later (if we no longer need it).
+        */
+
+       pvh_new = PV_HASHED_ENTRY_NULL;
+Retry:
+       pvh_e = PV_HASHED_ENTRY_NULL;
+
+       PMAP_LOCK(pmap);
+
+       /*
+        *      Expand pmap to include this pte.  Assume that
+        *      pmap is always expanded to include enough hardware
+        *      pages to map one VM page.
+        */
+        if(superpage) {
+               while ((pte = pmap64_pde(pmap, vaddr)) == PD_ENTRY_NULL) {
+                       /* need room for another pde entry */
+                       PMAP_UNLOCK(pmap);
+                       pmap_expand_pdpt(pmap, vaddr);
+                       PMAP_LOCK(pmap);
+               }
+       } else {
+               while ((pte = pmap_pte(pmap, vaddr)) == PT_ENTRY_NULL) {
+                       /*
+                        * Must unlock to expand the pmap
+                        * going to grow pde level page(s)
+                        */
+                       PMAP_UNLOCK(pmap);
+                       pmap_expand(pmap, vaddr);
+                       PMAP_LOCK(pmap);
+               }
+       }
+
+       if (superpage && *pte && !(*pte & INTEL_PTE_PS)) {
+               /*
+                * There is still an empty page table mapped that
+                * was used for a previous base page mapping.
+                * Remember the PDE and the PDE index, so that we
+                * can free the page at the end of this function.
+                */
+               delpage_pde_index = (int)pdeidx(pmap, vaddr);
+               delpage_pm_obj = pmap->pm_obj;
+               *pte = 0;
+       }
+
+
+       old_pa = pte_to_pa(*pte);
+       pai = pa_index(old_pa);
+       old_pa_locked = FALSE;
+
+       /*
+        * if we have a previous managed page, lock the pv entry now. after
+        * we lock it, check to see if someone beat us to the lock and if so
+        * drop the lock
+        */
+       if ((0 != old_pa) && IS_MANAGED_PAGE(pai)) {
+               LOCK_PVH(pai);
+               old_pa_locked = TRUE;
+               old_pa = pte_to_pa(*pte);
+               if (0 == old_pa) {
+                       UNLOCK_PVH(pai);        /* another path beat us to it */
+                       old_pa_locked = FALSE;
+               }
+       }
+
+       /*
+        *      Special case if the incoming physical page is already mapped
+        *      at this address.
+        */
+       if (old_pa == pa) {
+
+               /*
+                *      May be changing its wired attribute or protection
+                */
+
+               template = pa_to_pte(pa) | INTEL_PTE_VALID;
+
+               if (VM_MEM_NOT_CACHEABLE ==
+                   (flags & (VM_MEM_NOT_CACHEABLE | VM_WIMG_USE_DEFAULT))) {
+                       if (!(flags & VM_MEM_GUARDED))
+                               template |= INTEL_PTE_PTA;
+                       template |= INTEL_PTE_NCACHE;
+               }
+               if (pmap != kernel_pmap)
+                       template |= INTEL_PTE_USER;
+               if (prot & VM_PROT_WRITE)
+                       template |= INTEL_PTE_WRITE;
+
+               if (set_NX)
+                       template |= INTEL_PTE_NX;
+
+               if (wired) {
+                       template |= INTEL_PTE_WIRED;
+                       if (!iswired(*pte))
+                               OSAddAtomic(+1,
+                                       &pmap->stats.wired_count);
+               } else {
+                       if (iswired(*pte)) {
+                               assert(pmap->stats.wired_count >= 1);
+                               OSAddAtomic(-1,
+                                       &pmap->stats.wired_count);
+                       }
+               }
+               if (superpage)          /* this path can not be used */
+                       template |= INTEL_PTE_PS;       /* to change the page size! */
+
+               /* store modified PTE and preserve RC bits */
+               pmap_update_pte(pte, *pte,
+                       template | (*pte & (INTEL_PTE_REF | INTEL_PTE_MOD)));
+               if (old_pa_locked) {
+                       UNLOCK_PVH(pai);
+                       old_pa_locked = FALSE;
+               }
+               need_tlbflush = TRUE;
+               goto Done;
+       }
+
+       /*
+        *      Outline of code from here:
+        *         1) If va was mapped, update TLBs, remove the mapping
+        *            and remove old pvlist entry.
+        *         2) Add pvlist entry for new mapping
+        *         3) Enter new mapping.
+        *
+        *      If the old physical page is not managed step 1) is skipped
+        *      (except for updating the TLBs), and the mapping is
+        *      overwritten at step 3).  If the new physical page is not
+        *      managed, step 2) is skipped.
+        */
+
+       if (old_pa != (pmap_paddr_t) 0) {
+
+               /*
+                *      Don't do anything to pages outside valid memory here.
+                *      Instead convince the code that enters a new mapping
+                *      to overwrite the old one.
+                */
+
+               /* invalidate the PTE */
+               pmap_update_pte(pte, *pte, (*pte & ~INTEL_PTE_VALID));
+               /* propagate invalidate everywhere */
+               PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
+               /* remember reference and change */
+               old_pte = *pte;
+               oattr = (char) (old_pte & (PHYS_MODIFIED | PHYS_REFERENCED));
+               /* completely invalidate the PTE */
+               pmap_store_pte(pte, 0);
+
+               if (IS_MANAGED_PAGE(pai)) {
+#if TESTING
+                       if (pmap->stats.resident_count < 1)
+                               panic("pmap_enter: resident_count");
+#endif
+                       assert(pmap->stats.resident_count >= 1);
+                       OSAddAtomic(-1,
+                               &pmap->stats.resident_count);
+
+                       if (iswired(*pte)) {
+#if TESTING
+                               if (pmap->stats.wired_count < 1)
+                                       panic("pmap_enter: wired_count");
+#endif
+                               assert(pmap->stats.wired_count >= 1);
+                               OSAddAtomic(-1,
+                                       &pmap->stats.wired_count);
+                       }
+                       pmap_phys_attributes[pai] |= oattr;
+
+                       /*
+                        *      Remove the mapping from the pvlist for
+                        *      this physical page.
+                        *      We'll end up with either a rooted pv or a
+                        *      hashed pv
+                        */
+                       pvh_e = pmap_pv_remove(pmap, vaddr, (ppnum_t *) &pai, &old_pte);
+
+               } else {
+
+                       /*
+                        *      old_pa is not managed.
+                        *      Do removal part of accounting.
+                        */
+
+                       if (iswired(*pte)) {
+                               assert(pmap->stats.wired_count >= 1);
+                               OSAddAtomic(-1,
+                                       &pmap->stats.wired_count);
+                       }
+               }
+       }
+
+       /*
+        * if we had a previously managed paged locked, unlock it now
+        */
+       if (old_pa_locked) {
+               UNLOCK_PVH(pai);
+               old_pa_locked = FALSE;
+       }
+
+       pai = pa_index(pa);     /* now working with new incoming phys page */
+       if (IS_MANAGED_PAGE(pai)) {
+
+               /*
+                *      Step 2) Enter the mapping in the PV list for this
+                *      physical page.
+                */
+               pv_h = pai_to_pvh(pai);
+
+               LOCK_PVH(pai);
+
+               if (pv_h->pmap == PMAP_NULL) {
+                       /*
+                        *      No mappings yet, use rooted pv
+                        */
+                       pv_h->va = vaddr;
+                       pv_h->pmap = pmap;
+                       queue_init(&pv_h->qlink);
+               } else {
+                       /*
+                        *      Add new pv_hashed_entry after header.
+                        */
+                       if ((PV_HASHED_ENTRY_NULL == pvh_e) && pvh_new) {
+                               pvh_e = pvh_new;
+                               pvh_new = PV_HASHED_ENTRY_NULL;
+                       } else if (PV_HASHED_ENTRY_NULL == pvh_e) {
+                               PV_HASHED_ALLOC(pvh_e);
+                               if (PV_HASHED_ENTRY_NULL == pvh_e) {
+                                       /*
+                                        * the pv list is empty. if we are on
+                                        * the kernel pmap we'll use one of
+                                        * the special private kernel pv_e's,
+                                        * else, we need to unlock
+                                        * everything, zalloc a pv_e, and
+                                        * restart bringing in the pv_e with
+                                        * us.
+                                        */
+                                       if (kernel_pmap == pmap) {
+                                               PV_HASHED_KERN_ALLOC(pvh_e);
+                                       } else {
+                                               UNLOCK_PVH(pai);
+                                               PMAP_UNLOCK(pmap);
+                                               pvh_new = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone);
+                                               goto Retry;
+                                       }
+                               }
+                       }
+                       
+                       if (PV_HASHED_ENTRY_NULL == pvh_e)
+                               panic("Mapping alias chain exhaustion, possibly induced by numerous kernel virtual double mappings");
+
+                       pvh_e->va = vaddr;
+                       pvh_e->pmap = pmap;
+                       pvh_e->ppn = pn;
+                       pv_hash_add(pvh_e, pv_h);
+
+                       /*
+                        *      Remember that we used the pvlist entry.
+                        */
+                       pvh_e = PV_HASHED_ENTRY_NULL;
+               }
+
+               /*
+                * only count the mapping
+                * for 'managed memory'
+                */
+               OSAddAtomic(+1,  & pmap->stats.resident_count);
+               if (pmap->stats.resident_count > pmap->stats.resident_max) {
+                       pmap->stats.resident_max = pmap->stats.resident_count;
+               }
+       }
+       /*
+        * Step 3) Enter the mapping.
+        *
+        *      Build a template to speed up entering -
+        *      only the pfn changes.
+        */
+       template = pa_to_pte(pa) | INTEL_PTE_VALID;
+
+       if (flags & VM_MEM_NOT_CACHEABLE) {
+               if (!(flags & VM_MEM_GUARDED))
+                       template |= INTEL_PTE_PTA;
+               template |= INTEL_PTE_NCACHE;
+       }
+       if (pmap != kernel_pmap)
+               template |= INTEL_PTE_USER;
+       if (prot & VM_PROT_WRITE)
+               template |= INTEL_PTE_WRITE;
+       if (set_NX)
+               template |= INTEL_PTE_NX;
+       if (wired) {
+               template |= INTEL_PTE_WIRED;
+               OSAddAtomic(+1,  & pmap->stats.wired_count);
+       }
+       if (superpage)
+               template |= INTEL_PTE_PS;
+       pmap_store_pte(pte, template);
+
+       /*
+        * if this was a managed page we delayed unlocking the pv until here
+        * to prevent pmap_page_protect et al from finding it until the pte
+        * has been stored
+        */
+       if (IS_MANAGED_PAGE(pai)) {
+               UNLOCK_PVH(pai);
+       }
+Done:
+       if (need_tlbflush == TRUE)
+               PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
+
+       if (pvh_e != PV_HASHED_ENTRY_NULL) {
+               PV_HASHED_FREE_LIST(pvh_e, pvh_e, 1);
+       }
+       if (pvh_new != PV_HASHED_ENTRY_NULL) {
+               PV_HASHED_KERN_FREE_LIST(pvh_new, pvh_new, 1);
+       }
+       PMAP_UNLOCK(pmap);
+
+       if (delpage_pm_obj) {
+               vm_page_t m;
+
+               vm_object_lock(delpage_pm_obj);
+               m = vm_page_lookup(delpage_pm_obj, delpage_pde_index);
+               if (m == VM_PAGE_NULL)
+                   panic("pmap_enter: pte page not in object");
+               VM_PAGE_FREE(m);
+               OSAddAtomic(-1,  &inuse_ptepages_count);
+               vm_object_unlock(delpage_pm_obj);
+       }
+
+       PMAP_TRACE(PMAP_CODE(PMAP__ENTER) | DBG_FUNC_END, 0, 0, 0, 0, 0);
+}
+
+/*
+ *     Remove a range of hardware page-table entries.
+ *     The entries given are the first (inclusive)
+ *     and last (exclusive) entries for the VM pages.
+ *     The virtual address is the va for the first pte.
+ *
+ *     The pmap must be locked.
+ *     If the pmap is not the kernel pmap, the range must lie
+ *     entirely within one pte-page.  This is NOT checked.
+ *     Assumes that the pte-page exists.
+ */
+
+void
+pmap_remove_range(
+       pmap_t                  pmap,
+       vm_map_offset_t         start_vaddr,
+       pt_entry_t              *spte,
+       pt_entry_t              *epte)
+{
+       pt_entry_t              *cpte;
+       pv_hashed_entry_t       pvh_et = PV_HASHED_ENTRY_NULL;
+       pv_hashed_entry_t       pvh_eh = PV_HASHED_ENTRY_NULL;
+       pv_hashed_entry_t       pvh_e;
+       int                     pvh_cnt = 0;
+       int                     num_removed, num_unwired, num_found, num_invalid;
+       int                     pai;
+       pmap_paddr_t            pa;
+       vm_map_offset_t         vaddr;
+
+       num_removed = 0;
+       num_unwired = 0;
+       num_found   = 0;
+       num_invalid = 0;
+#if    defined(__i386__)
+       if (pmap != kernel_pmap &&
+           pmap->pm_task_map == TASK_MAP_32BIT &&
+           start_vaddr >= HIGH_MEM_BASE) {
+               /*
+                * The range is in the "high_shared_pde" which is shared
+                * between the kernel and all 32-bit tasks.  It holds
+                * the 32-bit commpage but also the trampolines, GDT, etc...
+                * so we can't let user tasks remove anything from it.
+                */
+               return;
+       }
+#endif
+       /* invalidate the PTEs first to "freeze" them */
+       for (cpte = spte, vaddr = start_vaddr;
+            cpte < epte;
+            cpte++, vaddr += PAGE_SIZE_64) {
+               pt_entry_t p = *cpte;
+
+               pa = pte_to_pa(p);
+               if (pa == 0)
+                       continue;
+               num_found++;
+
+               if (iswired(p))
+                       num_unwired++;
+               
+               pai = pa_index(pa);
+
+               if (!IS_MANAGED_PAGE(pai)) {
+                       /*
+                        *      Outside range of managed physical memory.
+                        *      Just remove the mappings.
+                        */
+                       pmap_store_pte(cpte, 0);
+                       continue;
+               }
+
+               if ((p & INTEL_PTE_VALID) == 0)
+                       num_invalid++;
+
+               /* invalidate the PTE */ 
+               pmap_update_pte(cpte, *cpte, (*cpte & ~INTEL_PTE_VALID));
+       }
+
+       if (num_found == 0) {
+               /* nothing was changed: we're done */
+               goto update_counts;
+       }
+
+       /* propagate the invalidates to other CPUs */
+
+       PMAP_UPDATE_TLBS(pmap, start_vaddr, vaddr);
+
+       for (cpte = spte, vaddr = start_vaddr;
+            cpte < epte;
+            cpte++, vaddr += PAGE_SIZE_64) {
+
+               pa = pte_to_pa(*cpte);
+               if (pa == 0)
+                       continue;
+
+               pai = pa_index(pa);
+
+               LOCK_PVH(pai);
+
+               pa = pte_to_pa(*cpte);
+               if (pa == 0) {
+                       UNLOCK_PVH(pai);
+                       continue;
+               }
+               num_removed++;
+
+               /*
+                * Get the modify and reference bits, then
+                * nuke the entry in the page table
+                */
+               /* remember reference and change */
+               pmap_phys_attributes[pai] |=
+                       (char) (*cpte & (PHYS_MODIFIED | PHYS_REFERENCED));
+
+               /*
+                * Remove the mapping from the pvlist for this physical page.
+                */
+               pvh_e = pmap_pv_remove(pmap, vaddr, (ppnum_t *) &pai, cpte);
+
+               /* completely invalidate the PTE */
+               pmap_store_pte(cpte, 0);
+
+               UNLOCK_PVH(pai);
+
+               if (pvh_e != PV_HASHED_ENTRY_NULL) {
+                       pvh_e->qlink.next = (queue_entry_t) pvh_eh;
+                       pvh_eh = pvh_e;
+
+                       if (pvh_et == PV_HASHED_ENTRY_NULL) {
+                               pvh_et = pvh_e;
+                       }
+                       pvh_cnt++;
+               }
+       } /* for loop */
+
+       if (pvh_eh != PV_HASHED_ENTRY_NULL) {
+               PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pvh_cnt);
+       }
+update_counts:
+       /*
+        *      Update the counts
+        */
+#if TESTING
+       if (pmap->stats.resident_count < num_removed)
+               panic("pmap_remove_range: resident_count");
+#endif
+       assert(pmap->stats.resident_count >= num_removed);
+       OSAddAtomic(-num_removed,  &pmap->stats.resident_count);
+
+#if TESTING
+       if (pmap->stats.wired_count < num_unwired)
+               panic("pmap_remove_range: wired_count");
+#endif
+       assert(pmap->stats.wired_count >= num_unwired);
+       OSAddAtomic(-num_unwired,  &pmap->stats.wired_count);
+
+       return;
+}
+
+
+/*
+ *     Remove the given range of addresses
+ *     from the specified map.
+ *
+ *     It is assumed that the start and end are properly
+ *     rounded to the hardware page size.
+ */
+void
+pmap_remove(
+       pmap_t          map,
+       addr64_t        s64,
+       addr64_t        e64)
+{
+       pt_entry_t     *pde;
+       pt_entry_t     *spte, *epte;
+       addr64_t        l64;
+       uint64_t        deadline;
+
+       pmap_intr_assert();
+
+       if (map == PMAP_NULL || s64 == e64)
+               return;
+
+       PMAP_TRACE(PMAP_CODE(PMAP__REMOVE) | DBG_FUNC_START,
+                  map,
+                  (uint32_t) (s64 >> 32), s64,
+                  (uint32_t) (e64 >> 32), e64);
+
+
+       PMAP_LOCK(map);
+
+#if 0
+       /*
+        * Check that address range in the kernel does not overlap the stacks.
+        * We initialize local static min/max variables once to avoid making
+        * 2 function calls for every remove. Note also that these functions
+        * both return 0 before kernel stacks have been initialized, and hence
+        * the panic is not triggered in this case.
+        */
+       if (map == kernel_pmap) {
+               static vm_offset_t kernel_stack_min = 0;
+               static vm_offset_t kernel_stack_max = 0;
+
+               if (kernel_stack_min == 0) {
+                       kernel_stack_min = min_valid_stack_address();
+                       kernel_stack_max = max_valid_stack_address();
+               }
+               if ((kernel_stack_min <= s64 && s64 < kernel_stack_max) ||
+                   (kernel_stack_min < e64 && e64 <= kernel_stack_max))
+                       panic("pmap_remove() attempted in kernel stack");
+       }
+#else
+
+       /*
+        * The values of kernel_stack_min and kernel_stack_max are no longer
+        * relevant now that we allocate kernel stacks in the kernel map,
+        * so the old code above no longer applies.  If we wanted to check that
+        * we weren't removing a mapping of a page in a kernel stack we'd 
+        * mark the PTE with an unused bit and check that here.
+        */
+
+#endif
+
+       deadline = rdtsc64() + max_preemption_latency_tsc;
+
+       while (s64 < e64) {
+               l64 = (s64 + pde_mapped_size) & ~(pde_mapped_size - 1);
+               if (l64 > e64)
+                       l64 = e64;
+               pde = pmap_pde(map, s64);
+
+               if (pde && (*pde & INTEL_PTE_VALID)) {
+                       if (*pde & INTEL_PTE_PS) {
+                               /*
+                                * If we're removing a superpage, pmap_remove_range()
+                                * must work on level 2 instead of level 1; and we're
+                                * only passing a single level 2 entry instead of a
+                                * level 1 range.
+                                */
+                               spte = pde;
+                               epte = spte+1; /* excluded */
+                       } else {
+                               spte = pmap_pte(map, (s64 & ~(pde_mapped_size - 1)));
+                               spte = &spte[ptenum(s64)];
+                               epte = &spte[intel_btop(l64 - s64)];
+                       }
+                       pmap_remove_range(map, s64, spte, epte);
+               }
+               s64 = l64;
+
+               if (s64 < e64 && rdtsc64() >= deadline) {
+                       PMAP_UNLOCK(map)
+                       PMAP_LOCK(map)
+                       deadline = rdtsc64() + max_preemption_latency_tsc;
+               }
+       }
+
+       PMAP_UNLOCK(map);
+
+       PMAP_TRACE(PMAP_CODE(PMAP__REMOVE) | DBG_FUNC_END,
+                  map, 0, 0, 0, 0);
+
+}
+
+/*
+ *     Routine:        pmap_page_protect
+ *
+ *     Function:
+ *             Lower the permission for all mappings to a given
+ *             page.
+ */
+void
+pmap_page_protect(
+        ppnum_t         pn,
+       vm_prot_t       prot)
+{
+       pv_hashed_entry_t       pvh_eh = PV_HASHED_ENTRY_NULL;
+       pv_hashed_entry_t       pvh_et = PV_HASHED_ENTRY_NULL;
+       pv_hashed_entry_t       nexth;
+       int                     pvh_cnt = 0;
+       pv_rooted_entry_t       pv_h;
+       pv_rooted_entry_t       pv_e;
+       pv_hashed_entry_t       pvh_e;
+       pt_entry_t              *pte;
+       int                     pai;
+       pmap_t                  pmap;
+       boolean_t               remove;
+
+       pmap_intr_assert();
+       assert(pn != vm_page_fictitious_addr);
+       if (pn == vm_page_guard_addr)
+               return;
+
+       pai = ppn_to_pai(pn);
+
+       if (!IS_MANAGED_PAGE(pai)) {
+               /*
+                *      Not a managed page.
+                */
+               return;
+       }
+       PMAP_TRACE(PMAP_CODE(PMAP__PAGE_PROTECT) | DBG_FUNC_START,
+                  pn, prot, 0, 0, 0);
+
+       /*
+        * Determine the new protection.
+        */
+       switch (prot) {
+       case VM_PROT_READ:
+       case VM_PROT_READ | VM_PROT_EXECUTE:
+               remove = FALSE;
+               break;
+       case VM_PROT_ALL:
+               return;         /* nothing to do */
+       default:
+               remove = TRUE;
+               break;
+       }
+
+       pv_h = pai_to_pvh(pai);
+
+       LOCK_PVH(pai);
+
+
+       /*
+        * Walk down PV list, if any, changing or removing all mappings.
+        */
+       if (pv_h->pmap == PMAP_NULL)
+               goto done;
+
+       pv_e = pv_h;
+       pvh_e = (pv_hashed_entry_t) pv_e;       /* cheat */
+
+       do {
+               vm_map_offset_t vaddr;
+
+               pmap = pv_e->pmap;
+               vaddr = pv_e->va;
+               pte = pmap_pte(pmap, vaddr);
+
+#if    DEBUG
+               if (pa_index(pte_to_pa(*pte)) != pn)
+                       panic("pmap_page_protect: PTE mismatch, pn: 0x%x, pmap: %p, vaddr: 0x%llx, pte: 0x%llx", pn, pmap, vaddr, *pte);
+#endif
+               if (0 == pte) {
+                       panic("pmap_page_protect() "
+                               "pmap=%p pn=0x%x vaddr=0x%llx\n",
+                               pmap, pn, vaddr);
+               }
+               nexth = (pv_hashed_entry_t) queue_next(&pvh_e->qlink);
+
+               /*
+                * Remove the mapping if new protection is NONE
+                * or if write-protecting a kernel mapping.
+                */
+               if (remove || pmap == kernel_pmap) {
+                       /*
+                        * Remove the mapping, collecting dirty bits.
+                        */
+                       pmap_update_pte(pte, *pte, *pte & ~INTEL_PTE_VALID);
+                       PMAP_UPDATE_TLBS(pmap, vaddr, vaddr+PAGE_SIZE);
+                       pmap_phys_attributes[pai] |=
+                               *pte & (PHYS_MODIFIED|PHYS_REFERENCED);
+                       pmap_store_pte(pte, 0);
+
+#if TESTING
+                       if (pmap->stats.resident_count < 1)
+                               panic("pmap_page_protect: resident_count");
+#endif
+                       assert(pmap->stats.resident_count >= 1);
+                       OSAddAtomic(-1,  &pmap->stats.resident_count);
+
+                       /*
+                        * Deal with the pv_rooted_entry.
+                        */
+
+                       if (pv_e == pv_h) {
+                               /*
+                                * Fix up head later.
+                                */
+                               pv_h->pmap = PMAP_NULL;
+                       } else {
+                               /*
+                                * Delete this entry.
+                                */
+                               pv_hash_remove(pvh_e);
+                               pvh_e->qlink.next = (queue_entry_t) pvh_eh;
+                               pvh_eh = pvh_e;
+
+                               if (pvh_et == PV_HASHED_ENTRY_NULL)
+                                       pvh_et = pvh_e;
+                               pvh_cnt++;
+                       }
+               } else {
+                       /*
+                        * Write-protect.
+                        */
+                       pmap_update_pte(pte, *pte, *pte & ~INTEL_PTE_WRITE);
+                       PMAP_UPDATE_TLBS(pmap, vaddr, vaddr+PAGE_SIZE);
+               }
+               pvh_e = nexth;
+       } while ((pv_e = (pv_rooted_entry_t) nexth) != pv_h);
+
+
+       /*
+        * If pv_head mapping was removed, fix it up.
+        */
+       if (pv_h->pmap == PMAP_NULL) {
+               pvh_e = (pv_hashed_entry_t) queue_next(&pv_h->qlink);
+
+               if (pvh_e != (pv_hashed_entry_t) pv_h) {
+                       pv_hash_remove(pvh_e);
+                       pv_h->pmap = pvh_e->pmap;
+                       pv_h->va = pvh_e->va;
+                       pvh_e->qlink.next = (queue_entry_t) pvh_eh;
+                       pvh_eh = pvh_e;
+
+                       if (pvh_et == PV_HASHED_ENTRY_NULL)
+                               pvh_et = pvh_e;
+                       pvh_cnt++;
+               }
+       }
+       if (pvh_eh != PV_HASHED_ENTRY_NULL) {
+               PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pvh_cnt);
+       }
+done:
+       UNLOCK_PVH(pai);
+
+       PMAP_TRACE(PMAP_CODE(PMAP__PAGE_PROTECT) | DBG_FUNC_END,
+                  0, 0, 0, 0, 0);
+}
+
+__private_extern__ void
+pmap_pagetable_corruption_msg_log(int (*log_func)(const char * fmt, ...)__printflike(1,2)) {
+       if (pmap_pagetable_corruption_incidents > 0) {
+               int i, e = MIN(pmap_pagetable_corruption_incidents, PMAP_PAGETABLE_CORRUPTION_MAX_LOG);
+               (*log_func)("%u pagetable corruption incident(s) detected, timeout: %u\n", pmap_pagetable_corruption_incidents, pmap_pagetable_corruption_timeout);
+               for (i = 0; i < e; i++) {
+                       (*log_func)("Incident 0x%x, reason: 0x%x, action: 0x%x, time: 0x%llx\n", pmap_pagetable_corruption_records[i].incident,  pmap_pagetable_corruption_records[i].reason, pmap_pagetable_corruption_records[i].action, pmap_pagetable_corruption_records[i].abstime);
+               }
+       }
+}
+
+void
+mapping_free_prime(void)
+{
+       int                     i;
+       pv_hashed_entry_t       pvh_e;
+       pv_hashed_entry_t       pvh_eh;
+       pv_hashed_entry_t       pvh_et;
+       int                     pv_cnt;
+
+       pv_cnt = 0;
+       pvh_eh = pvh_et = PV_HASHED_ENTRY_NULL;
+       for (i = 0; i < (5 * PV_HASHED_ALLOC_CHUNK); i++) {
+               pvh_e = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone);
+
+               pvh_e->qlink.next = (queue_entry_t)pvh_eh;
+               pvh_eh = pvh_e;
+
+               if (pvh_et == PV_HASHED_ENTRY_NULL)
+                       pvh_et = pvh_e;
+               pv_cnt++;
+       }
+       PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pv_cnt);
+
+       pv_cnt = 0;
+       pvh_eh = pvh_et = PV_HASHED_ENTRY_NULL;
+       for (i = 0; i < PV_HASHED_KERN_ALLOC_CHUNK; i++) {
+               pvh_e = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone);
+
+               pvh_e->qlink.next = (queue_entry_t)pvh_eh;
+               pvh_eh = pvh_e;
+
+               if (pvh_et == PV_HASHED_ENTRY_NULL)
+                       pvh_et = pvh_e;
+               pv_cnt++;
+       }
+       PV_HASHED_KERN_FREE_LIST(pvh_eh, pvh_et, pv_cnt);
+
+}
+
+static inline void
+pmap_pagetable_corruption_log_setup(void) {
+       if (pmap_pagetable_corruption_log_call == NULL) {
+               nanotime_to_absolutetime(PMAP_PAGETABLE_CORRUPTION_INTERVAL, 0, &pmap_pagetable_corruption_interval_abstime);
+               thread_call_setup(&pmap_pagetable_corruption_log_call_data,
+                   (thread_call_func_t) pmap_pagetable_corruption_msg_log,
+                   (thread_call_param_t) &printf);
+               pmap_pagetable_corruption_log_call = &pmap_pagetable_corruption_log_call_data;
+       }
+}
+
+void
+mapping_adjust(void)
+{
+       pv_hashed_entry_t       pvh_e;
+       pv_hashed_entry_t       pvh_eh;
+       pv_hashed_entry_t       pvh_et;
+       int                     pv_cnt;
+       int                     i;
+
+       if (mapping_adjust_call == NULL) {
+               thread_call_setup(&mapping_adjust_call_data,
+                                 (thread_call_func_t) mapping_adjust,
+                                 (thread_call_param_t) NULL);
+               mapping_adjust_call = &mapping_adjust_call_data;
+       }
+
+       pmap_pagetable_corruption_log_setup();
+
+       pv_cnt = 0;
+       pvh_eh = pvh_et = PV_HASHED_ENTRY_NULL;
+       if (pv_hashed_kern_free_count < PV_HASHED_KERN_LOW_WATER_MARK) {
+               for (i = 0; i < PV_HASHED_KERN_ALLOC_CHUNK; i++) {
+                       pvh_e = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone);
+
+                       pvh_e->qlink.next = (queue_entry_t)pvh_eh;
+                       pvh_eh = pvh_e;
+
+                       if (pvh_et == PV_HASHED_ENTRY_NULL)
+                               pvh_et = pvh_e;
+                       pv_cnt++;
+               }
+               PV_HASHED_KERN_FREE_LIST(pvh_eh, pvh_et, pv_cnt);
+       }
+
+       pv_cnt = 0;
+       pvh_eh = pvh_et = PV_HASHED_ENTRY_NULL;
+       if (pv_hashed_free_count < PV_HASHED_LOW_WATER_MARK) {
+               for (i = 0; i < PV_HASHED_ALLOC_CHUNK; i++) {
+                       pvh_e = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone);
+
+                       pvh_e->qlink.next = (queue_entry_t)pvh_eh;
+                       pvh_eh = pvh_e;
+
+                       if (pvh_et == PV_HASHED_ENTRY_NULL)
+                               pvh_et = pvh_e;
+                       pv_cnt++;
+               }
+               PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pv_cnt);
+       }
+       mappingrecurse = 0;
+}
+
index 9adfe5b832f90296e4b60148459a118620099a51..d61a26a7705ce22352459fe48c7fb5c3f552011f 100644 (file)
@@ -595,7 +595,7 @@ ipc_kmsg_alloc(
                mach_msg_size_t max_desc = (mach_msg_size_t)(((size - sizeof(mach_msg_base_t)) /
                                           sizeof(mach_msg_ool_descriptor32_t)) *
                                           DESC_SIZE_ADJUSTMENT);
-               if (msg_and_trailer_size >= MACH_MSG_SIZE_MAX - max_desc)
+               if (msg_and_trailer_size > MACH_MSG_SIZE_MAX - max_desc)
                        return IKM_NULL;
 
                max_expanded_size = msg_and_trailer_size + max_desc;
@@ -617,12 +617,9 @@ ipc_kmsg_alloc(
                        assert(i <= IKM_STASH);
                        kmsg = cache->entries[--i];
                        cache->avail = i;
-                       ikm_check_init(kmsg, max_expanded_size);
                        enable_preemption();
-                       kmsg->ikm_header = (mach_msg_header_t *)
-                                          ((vm_offset_t)(kmsg + 1) +
-                                           max_expanded_size -
-                                           msg_and_trailer_size);
+                       ikm_check_init(kmsg, max_expanded_size);
+                       ikm_set_header(kmsg, msg_and_trailer_size);
                        return (kmsg);
                }
                enable_preemption();
@@ -633,10 +630,7 @@ ipc_kmsg_alloc(
 
        if (kmsg != IKM_NULL) {
                ikm_init(kmsg, max_expanded_size);
-               kmsg->ikm_header = (mach_msg_header_t *)
-                                  ((vm_offset_t)(kmsg + 1) +
-                                   max_expanded_size -
-                                   msg_and_trailer_size);
+               ikm_set_header(kmsg, msg_and_trailer_size);
        }
 
        return(kmsg);
@@ -1072,6 +1066,23 @@ ipc_kmsg_clear_prealloc(
        IP_CLEAR_PREALLOC(port, kmsg);
 }
 
+/*
+ *     Routine:        ipc_kmsg_prealloc
+ *     Purpose:
+ *             Wraper to ipc_kmsg_alloc() to account for
+ *             header expansion requirements.
+ */
+ipc_kmsg_t
+ipc_kmsg_prealloc(mach_msg_size_t size)
+{
+#if defined(__LP64__)
+       if (size > MACH_MSG_SIZE_MAX - LEGACY_HEADER_SIZE_DELTA)
+               return IKM_NULL;
+
+       size += LEGACY_HEADER_SIZE_DELTA;
+#endif
+       return ipc_kmsg_alloc(size);
+}
 
 
 /*
@@ -1243,10 +1254,9 @@ ipc_kmsg_get_from_kernel(
         * clients.  These are set up for those kernel clients
         * which cannot afford to wait.
         */
-#ifndef __LP64__
-       /* LP64todo - does the prealloc kmsg need ikm_header padding?
-        */
        if (IP_PREALLOC(dest_port)) {
+               mach_msg_size_t max_desc = 0;
+
                ip_lock(dest_port);
                if (!ip_active(dest_port)) {
                        ip_unlock(dest_port);
@@ -1254,19 +1264,26 @@ ipc_kmsg_get_from_kernel(
                }
                assert(IP_PREALLOC(dest_port));
                kmsg = dest_port->ip_premsg;
-               if (msg_and_trailer_size > kmsg->ikm_size) {
-                       ip_unlock(dest_port);
-                       return MACH_SEND_TOO_LARGE;
-               }
                if (ikm_prealloc_inuse(kmsg)) {
                        ip_unlock(dest_port);
                        return MACH_SEND_NO_BUFFER;
                }
+#if !defined(__LP64__)
+               if (msg->msgh_bits & MACH_MSGH_BITS_COMPLEX) {
+                       assert(size > sizeof(mach_msg_base_t));
+                       max_desc = ((mach_msg_base_t *)msg)->body.msgh_descriptor_count *
+                               DESC_SIZE_ADJUSTMENT;
+               }
+#endif
+               if (msg_and_trailer_size > kmsg->ikm_size - max_desc) {
+                       ip_unlock(dest_port);
+                       return MACH_SEND_TOO_LARGE;
+               }
                ikm_prealloc_set_inuse(kmsg, dest_port);
+               ikm_set_header(kmsg, msg_and_trailer_size);
                ip_unlock(dest_port);
        }
        else
-#endif /* !__LP64__ */
        {
                kmsg = ipc_kmsg_alloc(msg_and_trailer_size);
                if (kmsg == IKM_NULL)
index db4df8ad5a68e7ad043c6b7261f4e20246c4e18d..8687cafbfb115f2cc88fa0822d289f2ff4eca82c 100644 (file)
@@ -162,6 +162,12 @@ MACRO_BEGIN                                                                \
        assert((kmsg)->ikm_next == IKM_BOGUS);                          \
 MACRO_END
 
+#define ikm_set_header(kmsg, mtsize)                                   \
+MACRO_BEGIN                                                            \
+       (kmsg)->ikm_header = (mach_msg_header_t *)                      \
+       ((vm_offset_t)((kmsg) + 1) + (kmsg)->ikm_size - (mtsize));      \
+MACRO_END
+
 struct ipc_kmsg_queue {
        struct ipc_kmsg *ikmq_base;
 };
@@ -267,13 +273,16 @@ extern void ipc_kmsg_destroy(
 extern void ipc_kmsg_destroy_dest(
        ipc_kmsg_t      kmsg);
 
-
 /* Preallocate a kernel message buffer */
+extern ipc_kmsg_t ipc_kmsg_prealloc(
+       mach_msg_size_t size);
+
+/* bind a preallocated message buffer to a port */
 extern void ipc_kmsg_set_prealloc(
        ipc_kmsg_t      kmsg,
        ipc_port_t      port);
 
-/* Clear a kernel message buffer */
+/* Clear preallocated message buffer binding */
 extern void ipc_kmsg_clear_prealloc(
        ipc_kmsg_t      kmsg,
        ipc_port_t      port);
index eaa7bad401df82d8aff9d1f72c31aafb5e81309e..76185c9ba74c8cde353a7c546b20efe19331e653 100644 (file)
@@ -107,6 +107,7 @@ decl_lck_mtx_data(, ipc_port_timestamp_lock_data)
 lck_mtx_ext_t  ipc_port_multiple_lock_data_ext;
 lck_mtx_ext_t  ipc_port_timestamp_lock_data_ext;
 ipc_port_timestamp_t   ipc_port_timestamp_data;
+int ipc_portbt;
 
 #if    MACH_ASSERT
 void   ipc_port_init_debug(
@@ -1235,8 +1236,14 @@ ipc_port_debug_init(void)
 {
        queue_init(&port_alloc_queue);
        lck_mtx_init_ext(&port_alloc_queue_lock, &port_alloc_queue_lock_ext, &ipc_lck_grp, &ipc_lck_attr);
+
+       if (!PE_parse_boot_argn("ipc_portbt", &ipc_portbt, sizeof (ipc_portbt)))
+               ipc_portbt = 0;
 }
 
+#ifdef MACH_BSD
+extern int proc_pid(struct proc*);
+#endif /* MACH_BSD */
 
 /*
  *     Initialize all of the debugging state in a port.
@@ -1255,12 +1262,22 @@ ipc_port_init_debug(
        for (i = 0; i < IP_NSPARES; ++i)
                port->ip_spares[i] = 0;
 
+#ifdef MACH_BSD
+       task_t task = current_task();
+       if (task != TASK_NULL) {
+               struct proc* proc = (struct proc*) get_bsdtask_info(task);
+               if (proc)
+                       port->ip_spares[0] = proc_pid(proc);
+       }
+#endif /* MACH_BSD */
+
        /*
         *      Machine-dependent routine to fill in an
         *      array with up to IP_CALLSTACK_MAX levels
         *      of return pc information.
         */
-       machine_callstack(&port->ip_callstack[0], IP_CALLSTACK_MAX);
+       if (ipc_portbt)
+               machine_callstack(&port->ip_callstack[0], IP_CALLSTACK_MAX);
 
 #if 0
        lck_mtx_lock(&port_alloc_queue_lock);
index 7249fe96a9be13e75fc05f9f020397f77e435396..4998a84bc6e860b3186daaae68767f3ad3d8de42 100644 (file)
@@ -150,8 +150,8 @@ struct ipc_port {
 #endif
 
 #if    MACH_ASSERT
-#define        IP_NSPARES              10
-#define        IP_CALLSTACK_MAX        10
+#define        IP_NSPARES              4
+#define        IP_CALLSTACK_MAX        16
        queue_chain_t   ip_port_links;  /* all allocated ports */
        thread_t        ip_thread;      /* who made me?  thread context */
        unsigned long   ip_timetrack;   /* give an idea of "when" created */
index 28d0fbc87d2fdc10c0be4d93f9bfd9a538f9e048..389e80bb1867c1313c16846197d52ddab8fa3668 100644 (file)
@@ -644,9 +644,11 @@ mach_port_allocate_full(
                        return KERN_RESOURCE_SHORTAGE;
                } else {
                        mach_msg_size_t size = qosp->len + MAX_TRAILER_SIZE;
+
                        if (right != MACH_PORT_RIGHT_RECEIVE)
                                return (KERN_INVALID_VALUE);
-                       kmsg = (ipc_kmsg_t)ipc_kmsg_alloc(size);
+
+                       kmsg = (ipc_kmsg_t)ipc_kmsg_prealloc(size);
                        if (kmsg == IKM_NULL)
                                return (KERN_RESOURCE_SHORTAGE);
                }
index dbacccfd85276f1c0aa275a0ba302b7f286ac8e5..df1d6d953d00ef9b7943e6c81f9db34b0fda4047 100644 (file)
 #include <mach/mach_types.h>
 #include <sys/appleapiopts.h>
 #include <kern/debug.h>
+#include <uuid/uuid.h>
 
 #include <kdp/kdp_internal.h>
 #include <kdp/kdp_private.h>
 #include <kdp/kdp_core.h>
+#include <kdp/kdp_dyld.h>
 
 #include <libsa/types.h>
 
@@ -115,6 +117,7 @@ int noresume_on_disconnect = 0;
 extern unsigned int return_on_panic;
 
 typedef struct thread_snapshot *thread_snapshot_t;
+typedef struct task_snapshot *task_snapshot_t;
 
 extern int
 machine_trace_thread(thread_t thread, char *tracepos, char *tracebound, int nframes, boolean_t user_p);
@@ -143,7 +146,7 @@ kdp_remove_breakpoint_internal(
 
 
 int
-kdp_stackshot(int pid, void *tracebuf, uint32_t tracebuf_size, unsigned trace_options, uint32_t *pbytesTraced);
+kdp_stackshot(int pid, void *tracebuf, uint32_t tracebuf_size, uint32_t trace_flags, uint32_t dispatch_offset, uint32_t *pbytesTraced);
 
 boolean_t kdp_copyin(pmap_t, uint64_t, void *, size_t);
 extern void bcopy_phys(addr64_t, addr64_t, vm_size_t);
@@ -1064,7 +1067,7 @@ kdp_copyin(pmap_t p, uint64_t uaddr, void *dest, size_t size) {
 }
 
 int
-kdp_stackshot(int pid, void *tracebuf, uint32_t tracebuf_size, unsigned trace_options, uint32_t *pbytesTraced)
+kdp_stackshot(int pid, void *tracebuf, uint32_t tracebuf_size, uint32_t trace_flags, uint32_t dispatch_offset, uint32_t *pbytesTraced)
 {
        char *tracepos = (char *) tracebuf;
        char *tracebound = tracepos + tracebuf_size;
@@ -1073,49 +1076,105 @@ kdp_stackshot(int pid, void *tracebuf, uint32_t tracebuf_size, unsigned trace_op
 
        task_t task = TASK_NULL;
        thread_t thread = THREAD_NULL;
-       int nframes = trace_options;
        thread_snapshot_t tsnap = NULL;
        unsigned framesize = 2 * sizeof(vm_offset_t);
-       boolean_t dispatch_p = ((trace_options & STACKSHOT_GET_DQ) != 0);
-       uint16_t  dispatch_offset = (trace_options & STACKSHOT_DISPATCH_OFFSET_MASK) >> STACKSHOT_DISPATCH_OFFSET_SHIFT;
        struct task ctask;
        struct thread cthread;
-
-       if ((nframes <= 0) || nframes > MAX_FRAMES)
-               nframes = MAX_FRAMES;
+       
+       boolean_t dispatch_p = ((trace_flags & STACKSHOT_GET_DQ) != 0);
+       boolean_t save_loadinfo_p = ((trace_flags & STACKSHOT_SAVE_LOADINFO) != 0);
 
        queue_iterate(&tasks, task, task_t, tasks) {
+               int task_pid = pid_from_task(task);
+               boolean_t task64 = task_has_64BitAddr(task);
+
                if ((task == NULL) || (ml_nofault_copy((vm_offset_t) task, (vm_offset_t) &ctask, sizeof(struct task)) != sizeof(struct task)))
                        goto error_exit;
+
                /* Trace everything, unless a process was specified */
-               if ((pid == -1) || (pid == pid_from_task(task)))
+               if ((pid == -1) || (pid == task_pid)) {
+                       task_snapshot_t task_snap;
+                       uint32_t uuid_info_count;
+                       mach_vm_address_t uuid_info_addr;
+
+                       if (save_loadinfo_p && task_pid > 0) {
+                               // Read the dyld_all_image_infos struct from the task memory to get UUID array count and location
+                               if (task64) {
+                                       struct dyld_all_image_infos64 task_image_infos;
+                                       if (!kdp_copyin(task->map->pmap, task->all_image_info_addr, &task_image_infos, sizeof(struct dyld_all_image_infos64)))
+                                               goto error_exit;
+                                       uuid_info_count = (uint32_t)task_image_infos.uuidArrayCount;
+                                       uuid_info_addr = task_image_infos.uuidArray;
+                               } else {
+                                       struct dyld_all_image_infos task_image_infos;
+                                       if (!kdp_copyin(task->map->pmap, task->all_image_info_addr, &task_image_infos, sizeof(struct dyld_all_image_infos)))
+                                               goto error_exit;
+                                       uuid_info_count = task_image_infos.uuidArrayCount;
+                                       uuid_info_addr = task_image_infos.uuidArray;
+                               }
+                       } else {
+                               uuid_info_count = 0;
+                               uuid_info_addr = 0;
+                       }
+
+                       if (tracepos + sizeof(struct task_snapshot) > tracebound) {
+                               error = -1;
+                               goto error_exit;
+                       }
+
+                       task_snap = (task_snapshot_t) tracepos;
+                       task_snap->snapshot_magic = STACKSHOT_TASK_SNAPSHOT_MAGIC;
+                       task_snap->pid = task_pid;
+                       task_snap->nloadinfos = uuid_info_count;
+                       /* Add the BSD process identifiers */
+                       if (task_pid != -1)
+                               proc_name_kdp(task, task_snap->p_comm, sizeof(task_snap->p_comm));
+                       else
+                               task_snap->p_comm[0] = '\0';
+                       task_snap->ss_flags = 0;
+                       if (task64)
+                               task_snap->ss_flags |= kUser64_p;
+                       
+                       tracepos += sizeof(struct task_snapshot);
+
+                       if (task_pid > 0 && uuid_info_count > 0) {
+                               uint32_t uuid_info_size = (uint32_t)(task64 ? sizeof(struct dyld_uuid_info64) : sizeof(struct dyld_uuid_info));
+                               uint32_t uuid_info_array_size = uuid_info_count * uuid_info_size;
+
+                               if (tracepos + uuid_info_array_size > tracebound) {
+                                       error = -1;
+                                       goto error_exit;
+                               }
+
+                               // Copy in the UUID info array
+                               if (!kdp_copyin(task->map->pmap, uuid_info_addr, tracepos, uuid_info_array_size))
+                                       goto error_exit;
+
+                               tracepos += uuid_info_array_size;
+                       }
+
                        queue_iterate(&task->threads, thread, thread_t, task_threads){
                                if ((thread == NULL) || (ml_nofault_copy((vm_offset_t) thread, (vm_offset_t) &cthread, sizeof(struct thread)) != sizeof(struct thread)))
                                        goto error_exit;
+
                                if (((tracepos + 4 * sizeof(struct thread_snapshot)) > tracebound)) {
                                        error = -1;
                                        goto error_exit;
                                }
-/* Populate the thread snapshot header */
+                               /* Populate the thread snapshot header */
                                tsnap = (thread_snapshot_t) tracepos;
                                tsnap->thread_id = (uint64_t) (uintptr_t)thread;
                                tsnap->state = thread->state;
                                tsnap->wait_event = thread->wait_event;
                                tsnap->continuation = (uint64_t) (uintptr_t) thread->continuation;
-/* Add the BSD process identifiers */
-                               if ((tsnap->pid = pid_from_task(task)) != -1)
-                                       proc_name_kdp(task, tsnap->p_comm, sizeof(tsnap->p_comm));
-                               else
-                                       tsnap->p_comm[0] = '\0';
 
-                               tsnap->snapshot_magic = 0xfeedface;
+                               tsnap->snapshot_magic = STACKSHOT_THREAD_SNAPSHOT_MAGIC;
                                tracepos += sizeof(struct thread_snapshot);
                                tsnap->ss_flags = 0;
 
                                if (dispatch_p && (task != kernel_task) && (task->active) && (task->map)) {
                                        uint64_t dqkeyaddr = thread_dispatchqaddr(thread);
                                        if (dqkeyaddr != 0) {
-                                               boolean_t task64 = task_has_64BitAddr(task);
                                                uint64_t dqaddr = 0;
                                                if (kdp_copyin(task->map->pmap, dqkeyaddr, &dqaddr, (task64 ? 8 : 4)) && (dqaddr != 0)) {
                                                        uint64_t dqserialnumaddr = dqaddr + dispatch_offset;
@@ -1133,27 +1192,27 @@ kdp_stackshot(int pid, void *tracebuf, uint32_t tracebuf_size, unsigned trace_op
  */
                                if (thread->kernel_stack != 0) {
 #if defined(__LP64__)                                  
-                                       tracebytes = machine_trace_thread64(thread, tracepos, tracebound, nframes, FALSE);
+                                       tracebytes = machine_trace_thread64(thread, tracepos, tracebound, MAX_FRAMES, FALSE);
                                        tsnap->ss_flags |= kKernel64_p;
                                        framesize = 16;
 #else
-                                       tracebytes = machine_trace_thread(thread, tracepos, tracebound, nframes, FALSE);
+                                       tracebytes = machine_trace_thread(thread, tracepos, tracebound, MAX_FRAMES, FALSE);
                                        framesize = 8;
 #endif
                                }
                                tsnap->nkern_frames = tracebytes/framesize;
                                tracepos += tracebytes;
                                tracebytes = 0;
-/* Trace user stack, if any */
+                               /* Trace user stack, if any */
                                if (thread->task->map != kernel_map) {
                                        /* 64-bit task? */
                                        if (task_has_64BitAddr(thread->task)) {
-                                               tracebytes = machine_trace_thread64(thread, tracepos, tracebound, nframes, TRUE);
+                                               tracebytes = machine_trace_thread64(thread, tracepos, tracebound, MAX_FRAMES, TRUE);
                                                tsnap->ss_flags |= kUser64_p;
                                                framesize = 16;
                                        }
                                        else {
-                                               tracebytes = machine_trace_thread(thread, tracepos, tracebound, nframes, TRUE);
+                                               tracebytes = machine_trace_thread(thread, tracepos, tracebound, MAX_FRAMES, TRUE);
                                                framesize = 8;
                                        }
                                }
@@ -1161,6 +1220,7 @@ kdp_stackshot(int pid, void *tracebuf, uint32_t tracebuf_size, unsigned trace_op
                                tracepos += tracebytes;
                                tracebytes = 0;
                        }
+               }
        }
 
 error_exit:
diff --git a/osfmk/kdp/kdp_dyld.h b/osfmk/kdp/kdp_dyld.h
new file mode 100644 (file)
index 0000000..ef22857
--- /dev/null
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+/*
+ * Data structure definitions copied from dyld so that we can read dyld's saved UUID information
+ * for each binary image not loaded from the shared cache during stackshots.
+ */
+
+/* From dyld/include/dyld_images.h */
+
+struct dyld_uuid_info {
+       user32_addr_t   imageLoadAddress;       /* base address image is mapped into */
+       uuid_t                  imageUUID;                      /* UUID of image */
+};
+
+struct dyld_uuid_info64 {
+       user64_addr_t   imageLoadAddress;       /* base address image is mapped into */
+       uuid_t                  imageUUID;                      /* UUID of image */
+};
+
+// FIXME: dyld is in C++, and some of the fields in dyld_all_image_infos are C++ 
+// native booleans.  There must be a better way...
+typedef uint8_t bool;
+
+struct dyld_all_image_infos {
+       uint32_t                                        version;
+       uint32_t                                        infoArrayCount;
+       user32_addr_t                           infoArray;
+       user32_addr_t                           notification;
+       bool                                            processDetachedFromSharedRegion;
+       bool                                            libSystemInitialized;
+       user32_addr_t                           dyldImageLoadAddress;
+       user32_addr_t                           jitInfo;
+       user32_addr_t                           dyldVersion;
+       user32_addr_t                           errorMessage;
+       user32_addr_t                           terminationFlags;
+       user32_addr_t                           coreSymbolicationShmPage;
+       user32_addr_t                           systemOrderFlag;
+       user32_size_t                           uuidArrayCount; // dyld defines this as a uintptr_t despite it being a count
+       user32_addr_t                           uuidArray;
+};
+
+struct dyld_all_image_infos64 {
+       uint32_t                                        version;
+       uint32_t                                        infoArrayCount;
+       user64_addr_t                           infoArray;
+       user64_addr_t                           notification;
+       bool                                            processDetachedFromSharedRegion;
+       bool                                            libSystemInitialized;
+       user64_addr_t                           dyldImageLoadAddress;
+       user64_addr_t                           jitInfo;
+       user64_addr_t                           dyldVersion;
+       user64_addr_t                           errorMessage;
+       user64_addr_t                           terminationFlags;
+       user64_addr_t                           coreSymbolicationShmPage;
+       user64_addr_t                           systemOrderFlag;
+       user64_size_t                           uuidArrayCount; // dyld defines this as a uintptr_t despite it being a count
+       user64_addr_t                           uuidArray;
+};
index 5cab18769328e1e3a7fcd19a74fafa08a1cc2878..0a54c5f2e22ee8f068eae6bea32a685ff6391093 100644 (file)
@@ -195,20 +195,21 @@ static unsigned stack_snapshot_bytes_traced = 0;
 static void *stack_snapshot_buf;
 static uint32_t stack_snapshot_bufsize;
 static int stack_snapshot_pid;
-static uint32_t stack_snapshot_options;
+static uint32_t stack_snapshot_flags;
+static uint32_t stack_snapshot_dispatch_offset;
 
 static unsigned int old_debugger;
 
 void
 kdp_snapshot_preflight(int pid, void * tracebuf, uint32_t tracebuf_size,
-    uint32_t options);
+    uint32_t flags, uint32_t dispatch_offset);
 
 void
 kdp_snapshot_postflight(void);
 
 extern int
 kdp_stackshot(int pid, void *tracebuf, uint32_t tracebuf_size,
-    unsigned trace_options, uint32_t *pbytesTraced);
+    uint32_t flags, uint32_t dispatch_offset, uint32_t *pbytesTraced);
 
 int
 kdp_stack_snapshot_geterror(void);
@@ -308,12 +309,13 @@ kdp_unregister_send_receive(
 
 /* Cache stack snapshot parameters in preparation for a trace */
 void
-kdp_snapshot_preflight(int pid, void * tracebuf, uint32_t tracebuf_size, uint32_t options)
+kdp_snapshot_preflight(int pid, void * tracebuf, uint32_t tracebuf_size, uint32_t flags, uint32_t dispatch_offset)
 {
        stack_snapshot_pid = pid;
        stack_snapshot_buf = tracebuf;
        stack_snapshot_bufsize = tracebuf_size;
-       stack_snapshot_options = options;
+       stack_snapshot_flags = flags;
+       stack_snapshot_dispatch_offset = dispatch_offset;
        kdp_snapshot++;
        /* Mark this debugger as active, since the polled mode driver that 
         * ordinarily does this may not be enabled (yet), or since KDB may be
@@ -1114,7 +1116,8 @@ kdp_raise_exception(
     if (kdp_snapshot && (!panic_active()) && (panic_caller == 0)) {
            stack_snapshot_ret = kdp_stackshot(stack_snapshot_pid,
            stack_snapshot_buf, stack_snapshot_bufsize,
-           stack_snapshot_options, &stack_snapshot_bytes_traced);
+           stack_snapshot_flags, stack_snapshot_dispatch_offset, 
+               &stack_snapshot_bytes_traced);
            return;
     }
 
index acec7297915a47085b89721801a997908b49c354..39aa1f425972716180df81c687d0f18a59145874 100644 (file)
@@ -431,16 +431,19 @@ extern const char version[];
 extern char osversion[];
 
 __private_extern__ void panic_display_system_configuration(void) {
-       static boolean_t config_displayed = FALSE;
+       static volatile boolean_t config_displayed = FALSE;
 
        panic_display_process_name();
        if (config_displayed == FALSE) {
+               config_displayed = TRUE;
                kdb_printf("\nMac OS version:\n%s\n",
                    (osversion[0] != 0) ? osversion : "Not yet set");
                kdb_printf("\nKernel version:\n%s\n",version);
                panic_display_model_name();
                panic_display_uptime();
-               config_displayed = TRUE;
+#if    defined(__i386__) || defined(__x86_64__)
+               pmap_pagetable_corruption_msg_log(&kdb_printf);
+#endif /* i386 || x86_64 */
                panic_display_zprint();
                kext_dump_panic_lists(&kdb_log);
        }
index d4ad172b9eb281f489d5fda3ffefd439834f3e8d..308435ece684501fc7a48d4f017dd3f6a63253fc 100644 (file)
@@ -39,12 +39,18 @@ struct thread_snapshot {
        uint32_t                snapshot_magic;
        uint32_t                nkern_frames;
        uint32_t                nuser_frames;
-       int32_t                 pid;
        uint64_t                wait_event;
        uint64_t                continuation;
        uint64_t                thread_id;
        int32_t                 state;
        char                    ss_flags;
+} __attribute__ ((packed));
+
+struct task_snapshot {
+       uint32_t                snapshot_magic;
+       int32_t                 pid;
+       uint32_t                nloadinfos;
+       char                    ss_flags;
        /* We restrict ourselves to a statically defined
         * (current as of 2009) length for the
         * p_comm string, due to scoping issues (osfmk/bsd and user/kernel
@@ -59,9 +65,13 @@ enum {
        kHasDispatchSerial = 0x4
 };
 
-enum   {STACKSHOT_GET_DQ = 1};
-#define STACKSHOT_DISPATCH_OFFSET_MASK 0xffff0000
-#define STACKSHOT_DISPATCH_OFFSET_SHIFT 16 
+enum {
+    STACKSHOT_GET_DQ = 0x1,
+    STACKSHOT_SAVE_LOADINFO = 0x2
+};
+
+#define STACKSHOT_THREAD_SNAPSHOT_MAGIC 0xfeedface
+#define STACKSHOT_TASK_SNAPSHOT_MAGIC 0xdecafbad
 
 #endif /* __APPLE_API_UNSTABLE */
 #endif /* __APPLE_API_PRIVATE */
@@ -70,6 +80,7 @@ enum  {STACKSHOT_GET_DQ = 1};
 
 extern unsigned int    systemLogDiags;
 extern char debug_buf[];
+extern unsigned int    debug_boot_arg;
 
 #ifdef MACH_KERNEL_PRIVATE
 
index 3410697249f5369eaedac9c4073475550f0fe061..ca65ceca63674b73db6d379aa313b2c54f25382e 100644 (file)
@@ -155,6 +155,15 @@ processor_init(
        processor_data_init(processor);
        processor->processor_list = NULL;
 
+       pset_lock(pset);
+       if (pset->cpu_set_count++ == 0)
+               pset->cpu_set_low = pset->cpu_set_hi = cpu_id;
+       else {
+               pset->cpu_set_low = (cpu_id < pset->cpu_set_low)? cpu_id: pset->cpu_set_low;
+               pset->cpu_set_hi = (cpu_id > pset->cpu_set_hi)? cpu_id: pset->cpu_set_hi;
+       }
+       pset_unlock(pset);
+
        simple_lock(&processor_list_lock);
        if (processor_list == NULL)
                processor_list = processor;
@@ -231,6 +240,8 @@ pset_init(
        queue_init(&pset->idle_queue);
        pset->processor_count = 0;
        pset->low_pri = pset->low_count = PROCESSOR_NULL;
+       pset->cpu_set_low = pset->cpu_set_hi = 0;
+       pset->cpu_set_count = 0;
        pset_lock_init(pset);
        pset->pset_self = IP_NULL;
        pset->pset_name_self = IP_NULL;
index fcf61d0444612d7e04fa4dbfc64d74bbd1c461f5..342a90081ef53999391ce7e7be8c6c7120c03944 100644 (file)
@@ -89,6 +89,9 @@ struct processor_set {
 
        int                                     processor_count;
 
+       int                                     cpu_set_low, cpu_set_hi;
+       int                                     cpu_set_count;
+
        decl_simple_lock_data(,sched_lock)      /* lock for above */
 
        struct ipc_port *       pset_self;              /* port for operations */
@@ -244,11 +247,15 @@ extern kern_return_t      processor_info_count(
 #define pset_deallocate(x)
 #define pset_reference(x)
 
-extern void                    machine_run_count(
-                                               uint32_t        count);
+extern void                            machine_run_count(
+                                                       uint32_t        count);
+
+extern boolean_t               machine_processor_is_inactive(
+                                                       processor_t                     processor);
 
-extern boolean_t       machine_cpu_is_inactive(
-                                               int                     cpu_id);
+extern processor_t             machine_choose_processor(
+                                                       processor_set_t         pset,
+                                                       processor_t                     processor);
 
 #else  /* MACH_KERNEL_PRIVATE */
 
index e1e5ae4c0272b9ee64f4393287f731377ef2e4d0..9a153ea2921bf391b285b6a9e20f32e9e0987653 100644 (file)
 #define BASEPRI_FOREGROUND     (BASEPRI_DEFAULT + 16)                          /* 47 */
 #define BASEPRI_BACKGROUND     (BASEPRI_DEFAULT + 15)                          /* 46 */
 #define BASEPRI_DEFAULT                (MAXPRI_USER - (NRQS / 4))                      /* 31 */
+#define MAXPRI_THROTTLE                (MINPRI + 4)                                            /*  4 */
 #define MINPRI_USER                    MINPRI                                                          /*  0 */
 
 /*
index 2dc656aecaf6d703eed9d737fcb99f6f72ff9bef..60191650c365406ed305e2448ae30bb843331f03 100644 (file)
@@ -1104,7 +1104,7 @@ thread_select(
 
                pset_lock(pset);
 
-               inactive_state = processor->state != PROCESSOR_SHUTDOWN && machine_cpu_is_inactive(processor->cpu_id);
+               inactive_state = processor->state != PROCESSOR_SHUTDOWN && machine_processor_is_inactive(processor);
 
                simple_lock(&rt_lock);
 
@@ -1680,8 +1680,7 @@ thread_dispatch(
                                        thread->realtime.deadline = UINT64_MAX;
                                        thread->reason |= AST_QUANTUM;
                                }
-                       }
-                       else {
+                       } else {
                                /*
                                 *      For non-realtime threads treat a tiny
                                 *      remaining quantum as an expired quantum
@@ -1726,12 +1725,25 @@ thread_dispatch(
                                /*
                                 *      Waiting.
                                 */
+                               boolean_t should_terminate = FALSE;
+
+                               /* Only the first call to thread_dispatch
+                                * after explicit termination should add
+                                * the thread to the termination queue
+                                */
+                               if ((thread->state & (TH_TERMINATE|TH_TERMINATE2)) == TH_TERMINATE) {
+                                       should_terminate = TRUE;
+                                       thread->state |= TH_TERMINATE2;
+                               }
+
                                thread->state &= ~TH_RUN;
 
                                if (thread->sched_mode & TH_MODE_TIMESHARE)
                                        sched_share_decr();
                                sched_run_decr();
 
+                               (*thread->sched_call)(SCHED_CALL_BLOCK, thread);
+
                                if (thread->wake_active) {
                                        thread->wake_active = FALSE;
                                        thread_unlock(thread);
@@ -1743,9 +1755,7 @@ thread_dispatch(
 
                                wake_unlock(thread);
 
-                               (*thread->sched_call)(SCHED_CALL_BLOCK, thread);
-
-                               if (thread->state & TH_TERMINATE)
+                               if (should_terminate)
                                        thread_terminate_enqueue(thread);
                        }
                }
@@ -2232,6 +2242,7 @@ choose_next_pset(
  *     choose_processor:
  *
  *     Choose a processor for the thread, beginning at
+ *     the pset.  Accepts an optional processor hint in
  *     the pset.
  *
  *     Returns a processor, possibly from a different pset.
@@ -2242,19 +2253,25 @@ choose_next_pset(
 static processor_t
 choose_processor(
        processor_set_t         pset,
+       processor_t                     processor,
        thread_t                        thread)
 {
        processor_set_t         nset, cset = pset;
-       processor_t                     processor = thread->last_processor;
        processor_meta_t        pmeta = PROCESSOR_META_NULL;
 
        /*
-        *      Prefer the last processor, when appropriate.
+        *      Prefer the hinted processor, when appropriate.
         */
        if (processor != PROCESSOR_NULL) {
+               processor_t                     mprocessor;
+
                if (processor->processor_meta != PROCESSOR_META_NULL)
                        processor = processor->processor_meta->primary;
 
+               mprocessor = machine_choose_processor(pset, processor);
+               if (mprocessor != PROCESSOR_NULL)
+                       processor = mprocessor;
+
                if (processor->processor_set != pset || processor->state == PROCESSOR_INACTIVE ||
                                processor->state == PROCESSOR_SHUTDOWN || processor->state == PROCESSOR_OFF_LINE)
                        processor = PROCESSOR_NULL;
@@ -2262,6 +2279,18 @@ choose_processor(
                if (processor->state == PROCESSOR_IDLE)
                        return (processor);
        }
+       else {
+               processor = machine_choose_processor(pset, processor);
+
+               if (processor != PROCESSOR_NULL) {
+                       if (processor->processor_set != pset || processor->state == PROCESSOR_INACTIVE ||
+                                       processor->state == PROCESSOR_SHUTDOWN || processor->state == PROCESSOR_OFF_LINE)
+                               processor = PROCESSOR_NULL;
+                       else
+                               if (processor->state == PROCESSOR_IDLE)
+                                       return (processor);
+               }
+       }
 
        /*
         *      Iterate through the processor sets to locate
@@ -2447,7 +2476,7 @@ thread_setrun(
                        pset = thread->affinity_set->aset_pset;
                        pset_lock(pset);
 
-                       processor = choose_processor(pset, thread);
+                       processor = choose_processor(pset, PROCESSOR_NULL, thread);
                }
                else
                if (thread->last_processor != PROCESSOR_NULL) {
@@ -2468,10 +2497,10 @@ thread_setrun(
                                 */
                                if (thread->sched_pri <= processor->current_pri ||
                                                thread->realtime.deadline >= processor->deadline)
-                                       processor = choose_processor(pset, thread);
+                                       processor = choose_processor(pset, PROCESSOR_NULL, thread);
                        }
                        else
-                               processor = choose_processor(pset, thread);
+                               processor = choose_processor(pset, processor, thread);
                }
                else {
                        /*
@@ -2489,7 +2518,7 @@ thread_setrun(
                        pset = choose_next_pset(pset);
                        pset_lock(pset);
 
-                       processor = choose_processor(pset, thread);
+                       processor = choose_processor(pset, PROCESSOR_NULL, thread);
                        task->pset_hint = processor->processor_set;
                }
        }
@@ -2645,7 +2674,7 @@ csw_check(
                                processor->processor_meta->primary != processor)
                return (AST_PREEMPT);
 
-       if (machine_cpu_is_inactive(processor->cpu_id))
+       if (machine_processor_is_inactive(processor))
                return (AST_PREEMPT);
 
        if (processor->active_thread->state & TH_SUSP)
@@ -2925,7 +2954,7 @@ processor_idle(
 
                (void)splsched();
 
-               if (processor->state == PROCESSOR_INACTIVE && !machine_cpu_is_inactive(processor->cpu_id))
+               if (processor->state == PROCESSOR_INACTIVE && !machine_processor_is_inactive(processor))
                        break;
        }
 
index d47b67c528839f23f6467715fc9400d3ceebfc94..9f1c953473eeb0ed74890e5dfcf3b03bf15d81d3 100644 (file)
@@ -159,9 +159,6 @@ extern void         idle_thread(void);
 extern kern_return_t   idle_thread_create(
                                                        processor_t             processor);
 
-/* Start thread running */
-extern void            thread_bootstrap_return(void);
-
 /* Continuation return from syscall */
 extern void     thread_syscall_return(
                         kern_return_t   ret);
@@ -225,6 +222,9 @@ extern kern_return_t clear_wait(
                                                thread_t                thread,
                                                wait_result_t   result);
 
+/* Start thread running */
+extern void            thread_bootstrap_return(void);
+
 /* Return from exception (BSD-visible interface) */
 extern void            thread_exception_return(void) __dead2;
 
index 0f027820d988feb6fec35b2aff12ca9e53b02e0f..d3395ddb452030978fe6c4a3fa73751d5ff679e2 100644 (file)
@@ -105,6 +105,16 @@ task_policy_set(
                                task->role = info->role;
                        }
                }
+               else
+               if (info->role == TASK_THROTTLE_APPLICATION) {
+                       task_priority(task, MAXPRI_THROTTLE, MAXPRI_THROTTLE);
+                       task->role = info->role;
+               }
+               else
+               if (info->role == TASK_DEFAULT_APPLICATION) {
+                       task_priority(task, BASEPRI_DEFAULT, MAXPRI_USER);
+                       task->role = info->role;
+               }
                else
                        result = KERN_INVALID_ARGUMENT;
 
index b33a7d2be63d7e44cd6ef21b0380b246a0de7085..581a37c7ffa9637dbf7b685b80efbf8dd98a65d8 100644 (file)
@@ -822,6 +822,7 @@ thread_create_running(
 kern_return_t
 thread_create_workq(
        task_t                          task,
+       thread_continue_t               thread_return,
        thread_t                        *new_thread)
 {
        kern_return_t           result;
@@ -830,8 +831,7 @@ thread_create_workq(
        if (task == TASK_NULL || task == kernel_task)
                return (KERN_INVALID_ARGUMENT);
 
-       result = thread_create_internal(task, -1, (thread_continue_t)thread_bootstrap_return,
-                                                                                                       TH_OPTION_NOCRED | TH_OPTION_NOSUSP, &thread);
+       result = thread_create_internal(task, -1, thread_return, TH_OPTION_NOCRED | TH_OPTION_NOSUSP, &thread);
        if (result != KERN_SUCCESS)
                return (result);
 
index 61217f52e283a06d5c154db33a2d6e73e0df218a..db2c6e3520085375b9db019684a92f960cdda985 100644 (file)
@@ -178,6 +178,7 @@ struct thread {
 #define TH_RUN                 0x04                    /* running or on runq */
 #define TH_UNINT               0x08                    /* waiting uninteruptibly */
 #define        TH_TERMINATE    0x10                    /* halted at termination */
+#define        TH_TERMINATE2   0x20                    /* added to termination queue */
 
 #define TH_IDLE                        0x80                    /* idling processor */
 
@@ -640,6 +641,7 @@ __BEGIN_DECLS
 
 extern kern_return_t   thread_create_workq(
                                                        task_t                  task,
+                                                       thread_continue_t       thread_return,
                                                        thread_t                *new_thread);
 
 extern void    thread_yield_internal(
index ab9bab486a5f237e4346e20768bd78ae906aa023..92f0b642b9d15503020e79d1846da479ef3be081 100644 (file)
@@ -59,6 +59,7 @@ struct thread_call_group {
        timer_call_data_t       delayed_timer;
 
        struct wait_queue       idle_wqueue;
+       struct wait_queue       daemon_wqueue;
        uint32_t                        idle_count, active_count;
 };
 
@@ -149,6 +150,7 @@ thread_call_initialize(void)
        timer_call_setup(&group->delayed_timer, thread_call_delayed_timer, group);
 
        wait_queue_init(&group->idle_wqueue, SYNC_POLICY_FIFO);
+       wait_queue_init(&group->daemon_wqueue, SYNC_POLICY_FIFO);
 
     queue_init(&thread_call_internal_queue);
     for (
@@ -772,7 +774,7 @@ thread_call_wake(
        else
        if (!thread_call_daemon_awake) {
                thread_call_daemon_awake = TRUE;
-               thread_wakeup_one(&thread_call_daemon_awake);
+               wait_queue_wakeup_one(&group->daemon_wqueue, NULL, THREAD_AWAKENED);
        }
 }
 
@@ -901,8 +903,8 @@ thread_call_daemon_continue(
                simple_lock(&thread_call_lock);
     }
 
-       thread_call_daemon_awake = FALSE;
-    assert_wait(&thread_call_daemon_awake, THREAD_UNINT);
+    thread_call_daemon_awake = FALSE;
+    wait_queue_assert_wait(&group->daemon_wqueue, NULL, THREAD_UNINT, 0);
     
     simple_unlock(&thread_call_lock);
        (void) spllo();
index a07f4c98fb61e049d0865247ec2a073a08ee7598..3a2fb39c4415b37ea5df7409adc6e976d60dee37 100644 (file)
@@ -109,7 +109,9 @@ enum task_role {
        TASK_FOREGROUND_APPLICATION,
        TASK_BACKGROUND_APPLICATION,
        TASK_CONTROL_APPLICATION,
-       TASK_GRAPHICS_SERVER
+       TASK_GRAPHICS_SERVER,
+       TASK_THROTTLE_APPLICATION,
+       TASK_DEFAULT_APPLICATION
 };
 
 typedef enum task_role         task_role_t;
index eed5107711edad8a31ddd9dd63362f387e01c416..6fe17d43cc53a69e4e9d09b033db062bfcf9bba9 100644 (file)
@@ -130,4 +130,12 @@ typedef int                vm_prot_t;
 #define VM_PROT_WANTS_COPY     ((vm_prot_t) 0x10)
 
 
+/*
+ *     The caller wants this memory region treated as if it had a valid
+ *     code signature.
+ */
+
+#define VM_PROT_TRUSTED                ((vm_prot_t) 0x20)
+
+
 #endif /* _MACH_VM_PROT_H_ */
index 7edacae01318a0e841cb00b1f1b44990d396642e..bc79f0c7c70fe66ec96180e9ee366a5a23b23456 100644 (file)
@@ -820,11 +820,17 @@ machine_run_count(__unused uint32_t count)
 }
 
 boolean_t
-machine_cpu_is_inactive(__unused int num)
+machine_processor_is_inactive(__unused processor_t processor)
 {
     return(FALSE);
 }
 
+processor_t
+machine_choose_processor(__unused processor_set_t pset, processor_t processor)
+{
+    return (processor);
+}
+
 vm_offset_t ml_stack_remaining(void)
 {
        uintptr_t local = (uintptr_t) &local;
index cc652d4f8270f7d3aaa30a65cb3d663cdb4ac1aa..b339dbd7d515bdeac804da4b0068e2cc44981242 100644 (file)
@@ -2351,7 +2351,7 @@ vm_fault_enter(vm_page_t m,
                        /* Page might have been tainted before or not; now it
                         * definitively is. If the page wasn't tainted, we must
                         * disconnect it from all pmaps later. */
-                       must_disconnect = ~m->cs_tainted;
+                       must_disconnect = !m->cs_tainted;
                        m->cs_tainted = TRUE;
                        cs_enter_tainted_accepted++;
                }
index d48a044fa4a718c21776706856bdc25616c86fb7..a0f5e8c9b07fc7a2a75eb431f1098c2a1b55425a 100644 (file)
@@ -4704,6 +4704,8 @@ vm_map_submap_pmap_clean(
 
        submap_end = offset + (end - start);
        submap_start = offset;
+
+       vm_map_lock_read(sub_map);
        if(vm_map_lookup_entry(sub_map, offset, &entry)) {
                
                remove_size = (entry->vme_end - entry->vme_start);
@@ -4775,7 +4777,8 @@ vm_map_submap_pmap_clean(
                        }
                }
                entry = entry->vme_next;
-       } 
+       }
+       vm_map_unlock_read(sub_map);
        return;
 }
 
@@ -12547,3 +12550,95 @@ void vm_map_switch_protect(vm_map_t    map,
        map->switch_protect=val;
        vm_map_unlock(map);
 }
+
+/* Add (generate) code signature for memory range */
+#if CONFIG_DYNAMIC_CODE_SIGNING
+kern_return_t vm_map_sign(vm_map_t map, 
+                vm_map_offset_t start, 
+                vm_map_offset_t end)
+{
+       vm_map_entry_t entry;
+       vm_page_t m;
+       vm_object_t object;
+       
+       /*
+        * Vet all the input parameters and current type and state of the
+        * underlaying object.  Return with an error if anything is amiss.
+        */
+       if (map == VM_MAP_NULL)
+               return(KERN_INVALID_ARGUMENT);
+               
+       vm_map_lock_read(map);
+       
+       if (!vm_map_lookup_entry(map, start, &entry) || entry->is_sub_map) {
+               /*
+                * Must pass a valid non-submap address.
+                */
+               vm_map_unlock_read(map);
+               return(KERN_INVALID_ADDRESS);
+       }
+       
+       if((entry->vme_start > start) || (entry->vme_end < end)) {
+               /*
+                * Map entry doesn't cover the requested range. Not handling
+                * this situation currently.
+                */
+               vm_map_unlock_read(map);
+               return(KERN_INVALID_ARGUMENT);
+       }
+       
+       object = entry->object.vm_object;
+       if (object == VM_OBJECT_NULL) {
+               /*
+                * Object must already be present or we can't sign.
+                */
+               vm_map_unlock_read(map);
+               return KERN_INVALID_ARGUMENT;
+       }
+       
+       vm_object_lock(object);
+       vm_map_unlock_read(map);
+       
+       while(start < end) {
+               uint32_t refmod;
+               
+               m = vm_page_lookup(object, start - entry->vme_start + entry->offset );
+               if (m==VM_PAGE_NULL) {
+                       /* shoud we try to fault a page here? we can probably 
+                        * demand it exists and is locked for this request */
+                       vm_object_unlock(object);
+                       return KERN_FAILURE;
+               }
+               /* deal with special page status */
+               if (m->busy || 
+                   (m->unusual && (m->error || m->restart || m->private || m->absent))) {
+                       vm_object_unlock(object);
+                       return KERN_FAILURE;
+               }
+               
+               /* Page is OK... now "validate" it */
+               /* This is the place where we'll call out to create a code 
+                * directory, later */
+               m->cs_validated = TRUE;
+
+               /* The page is now "clean" for codesigning purposes. That means
+                * we don't consider it as modified (wpmapped) anymore. But 
+                * we'll disconnect the page so we note any future modification
+                * attempts. */
+               m->wpmapped = FALSE;
+               refmod = pmap_disconnect(m->phys_page);
+               
+               /* Pull the dirty status from the pmap, since we cleared the 
+                * wpmapped bit */
+               if ((refmod & VM_MEM_MODIFIED) && !m->dirty) {
+                       m->dirty = TRUE;
+               }
+               
+               /* On to the next page */
+               start += PAGE_SIZE;
+       }
+       vm_object_unlock(object);
+       
+       return KERN_SUCCESS;
+}
+#endif
index f520087ed448f12248465069285d37388a62bfd6..09eaa747306159c2f0da4f21677cdbc947aa6d53 100644 (file)
@@ -1024,6 +1024,12 @@ extern kern_return_t vm_map_get_upl(
                                int             *flags,
                                int             force_data_sync);
 
+#if CONFIG_DYNAMIC_CODE_SIGNING
+extern kern_return_t vm_map_sign(vm_map_t map, 
+                                vm_map_offset_t start, 
+                                vm_map_offset_t end);
+#endif
+
 __END_DECLS
 
 #endif /* KERNEL_PRIVATE */
index 5ad70b32332065d0c911c3df05c6d64c9912e999..e8a1605a75957a0fdb12d28d7f0c595d7f2894e6 100644 (file)
@@ -663,14 +663,15 @@ copyio(int copy_type, user_addr_t user_addr, char *kernel_addr,
 
         pmap = thread->map->pmap;
 
+
+       assert((vm_offset_t)kernel_addr >= VM_MIN_KERNEL_AND_KEXT_ADDRESS ||
+              copy_type == COPYINPHYS || copy_type == COPYOUTPHYS);
+
        /* Sanity and security check for addresses to/from a user */
-       if ((copy_type == COPYIN ||
-            copy_type == COPYINSTR ||
-            copy_type == COPYOUT) &&
-           (pmap != kernel_pmap) &&
-           ((vm_offset_t)kernel_addr < VM_MIN_KERNEL_AND_KEXT_ADDRESS ||
-            !IS_USERADDR64_CANONICAL(user_addr))) {
-               error = EACCES;
+
+       if (((pmap != kernel_pmap) && (use_kernel_map == 0)) &&
+           ((nbytes && (user_addr+nbytes <= user_addr)) || ((user_addr + nbytes) > vm_map_max(thread->map)))) {
+               error = EFAULT;
                goto out;
        }
 
index 13c439a96e85d25404bc5f27619658b2a879a15e..e53843224c62381ee851207931add9ea17adb1f6 100644 (file)
@@ -90,7 +90,6 @@
  */
 
 #include <string.h>
-#include <norma_vm.h>
 #include <mach_kdb.h>
 #include <mach_ldebug.h>
 
 #include <i386/mp_desc.h>
 
 
-/* #define DEBUGINTERRUPTS 1  uncomment to ensure pmap callers have interrupts enabled */
-#ifdef DEBUGINTERRUPTS
-#define pmap_intr_assert() {                                                   \
-       if (processor_avail_count > 1 && !ml_get_interrupts_enabled())          \
-               panic("pmap interrupt assert %s, %d",__FILE__, __LINE__);       \
-}
-#else
-#define pmap_intr_assert()
-#endif
 
 #ifdef IWANTTODEBUG
 #undef DEBUG
@@ -178,11 +168,6 @@ boolean_t  no_shared_cr3 = DEBUG;          /* TRUE for DEBUG by default */
  * Forward declarations for internal functions.
  */
 
-void           pmap_remove_range(
-                       pmap_t          pmap,
-                       vm_map_offset_t va,
-                       pt_entry_t      *spte,
-                       pt_entry_t      *epte);
 
 void           phys_attribute_clear(
                        ppnum_t         phys,
@@ -209,166 +194,12 @@ int allow_stack_exec = 0;                /* No apps may execute from the stack by default */
 
 const boolean_t cpu_64bit  = TRUE; /* Mais oui! */
 
-/*
- * when spinning through pmap_remove
- * ensure that we don't spend too much
- * time with preemption disabled.
- * I'm setting the current threshold
- * to 20us
- */
-#define MAX_PREEMPTION_LATENCY_NS 20000
-
 uint64_t max_preemption_latency_tsc = 0;
 
-
-/*
- *     Private data structures.
- */
-
-/*
- *     For each vm_page_t, there is a list of all currently
- *     valid virtual mappings of that page.  An entry is
- *     a pv_rooted_entry_t; the list is the pv_table.
- *
- *      N.B.  with the new combo rooted/hashed scheme it is
- *      only possibly to remove individual non-rooted entries
- *      if they are found via the hashed chains as there is no
- *      way to unlink the singly linked hashed entries if navigated to
- *      via the queue list off the rooted entries.  Think of it as
- *      hash/walk/pull, keeping track of the prev pointer while walking
- *      the singly linked hash list.  All of this is to save memory and
- *      keep both types of pv_entries as small as possible.
- */
-
-/*
-
-PV HASHING Changes - JK 1/2007
-
-Pve's establish physical to virtual mappings.  These are used for aliasing of a 
-physical page to (potentially many) virtual addresses within pmaps. In the
-previous implementation the structure of the pv_entries (each 16 bytes in size) was
-
-typedef struct pv_entry {
-    struct pv_entry_t    next;
-    pmap_t                    pmap;
-    vm_map_offset_t   va;
-} *pv_entry_t;
-
-An initial array of these is created at boot time, one per physical page of
-memory, indexed by the physical page number. Additionally, a pool of entries
-is created from a pv_zone to be used as needed by pmap_enter() when it is
-creating new mappings.  Originally, we kept this pool around because the code
-in pmap_enter() was unable to block if it needed an entry and none were
-available - we'd panic.  Some time ago I restructured the pmap_enter() code
-so that for user pmaps it can block while zalloc'ing a pv structure and restart,
-removing a panic from the code (in the case of the kernel pmap we cannot block
-and still panic, so, we keep a separate hot pool for use only on kernel pmaps).
-The pool has not been removed since there is a large performance gain keeping
-freed pv's around for reuse and not suffering the overhead of zalloc for every
-new pv we need.
-
-As pmap_enter() created new mappings it linked the new pve's for them off the
-fixed pv array for that ppn (off the next pointer).  These pve's are accessed
-for several operations, one of them being address space teardown. In that case,
-we basically do this
-
-       for (every page/pte in the space) {
-               calc pve_ptr from the ppn in the pte
-               for (every pv in the list for the ppn) {
-                       if (this pv is for this pmap/vaddr) {
-                               do housekeeping
-                               unlink/free the pv
-                       }
-               }
-       }
-
-The problem arose when we were running, say 8000 (or even 2000) apache or
-other processes and one or all terminate. The list hanging off each pv array
-entry could have thousands of entries.  We were continuously linearly searching
-each of these lists as we stepped through the address space we were tearing
-down.  Because of the locks we hold, likely taking a cache miss for each node,
-and interrupt disabling for MP issues the system became completely unresponsive
-for many seconds while we did this.
-
-Realizing that pve's are accessed in two distinct ways (linearly running the
-list by ppn for operations like pmap_page_protect and finding and
-modifying/removing a single pve as part of pmap_enter processing) has led to
-modifying the pve structures and databases.
-
-There are now two types of pve structures.  A "rooted" structure which is
-basically the original structure accessed in an array by ppn, and a ''hashed''
-structure accessed on a hash list via a hash of [pmap, vaddr]. These have been
-designed with the two goals of minimizing wired memory and making the lookup of
-a ppn faster.  Since a vast majority of pages in the system are not aliased
-and hence represented by a single pv entry I've kept the rooted entry size as
-small as possible because there is one of these dedicated for every physical
-page of memory.  The hashed pve's are larger due to the addition of the hash
-link and the ppn entry needed for matching while running the hash list to find
-the entry we are looking for.  This way, only systems that have lots of
-aliasing (like 2000+ httpd procs) will pay the extra memory price. Both
-structures have the same first three fields allowing some simplification in
-the code.
-
-They have these shapes
-
-typedef struct pv_rooted_entry {
-       queue_head_t            qlink;
-        vm_map_offset_t                va;
-       pmap_t                  pmap;
-} *pv_rooted_entry_t;
-
-
-typedef struct pv_hashed_entry {
-       queue_head_t            qlink;
-       vm_map_offset_t         va;
-       pmap_t                  pmap;
-       ppnum_t                 ppn;
-       struct pv_hashed_entry *nexth;
-} *pv_hashed_entry_t;
-
-The main flow difference is that the code is now aware of the rooted entry and
-the hashed entries.  Code that runs the pv list still starts with the rooted
-entry and then continues down the qlink onto the hashed entries.  Code that is
-looking up a specific pv entry first checks the rooted entry and then hashes
-and runs the hash list for the match. The hash list lengths are much smaller
-than the original pv lists that contained all aliases for the specific ppn.
-
-*/
-
-typedef struct pv_rooted_entry {
-       /* first three entries must match pv_hashed_entry_t */
-        queue_head_t           qlink;
-       vm_map_offset_t         va;     /* virtual address for mapping */
-       pmap_t                  pmap;   /* pmap where mapping lies */
-} *pv_rooted_entry_t;
-
-#define PV_ROOTED_ENTRY_NULL   ((pv_rooted_entry_t) 0)
-
-pv_rooted_entry_t      pv_head_table;          /* array of entries, one per page */
-
-typedef struct pv_hashed_entry {
-       /* first three entries must match pv_rooted_entry_t */
-       queue_head_t            qlink;
-       vm_map_offset_t         va;
-       pmap_t                  pmap;
-       ppnum_t                 ppn;
-       struct pv_hashed_entry  *nexth;
-} *pv_hashed_entry_t;
-
-#define PV_HASHED_ENTRY_NULL ((pv_hashed_entry_t)0)
-
-#define NPVHASH 4095   /* MUST BE 2^N - 1 */
 pv_hashed_entry_t     *pv_hash_table;  /* hash lists */
 
 uint32_t npvhash = 0;
 
-//#define PV_DEBUG 1   /* uncomment to enable some PV debugging code */
-#ifdef PV_DEBUG
-#define CHK_NPVHASH() if(0 == npvhash) panic("npvhash uninitialized");
-#else
-#define CHK_NPVHASH(x)
-#endif
-
 pv_hashed_entry_t      pv_hashed_free_list = PV_HASHED_ENTRY_NULL;
 pv_hashed_entry_t      pv_hashed_kern_free_list = PV_HASHED_ENTRY_NULL;
 decl_simple_lock_data(,pv_hashed_free_list_lock)
@@ -377,53 +208,7 @@ decl_simple_lock_data(,pv_hash_table_lock)
 
 int                    pv_hashed_free_count = 0;
 int                    pv_hashed_kern_free_count = 0;
-#define PV_HASHED_LOW_WATER_MARK 5000
-#define PV_HASHED_KERN_LOW_WATER_MARK 100
-#define PV_HASHED_ALLOC_CHUNK 2000
-#define PV_HASHED_KERN_ALLOC_CHUNK 50
-thread_call_t          mapping_adjust_call;
-static thread_call_data_t mapping_adjust_call_data;
-uint32_t               mappingrecurse = 0;
-
-#define        PV_HASHED_ALLOC(pvh_e) {                                        \
-       simple_lock(&pv_hashed_free_list_lock);                         \
-       if ((pvh_e = pv_hashed_free_list) != 0) {                       \
-         pv_hashed_free_list = (pv_hashed_entry_t)pvh_e->qlink.next;   \
-          pv_hashed_free_count--;                                      \
-          if (pv_hashed_free_count < PV_HASHED_LOW_WATER_MARK)         \
-            if (hw_compare_and_store(0,1,(u_int *)&mappingrecurse))    \
-              thread_call_enter(mapping_adjust_call);                  \
-       }                                                               \
-       simple_unlock(&pv_hashed_free_list_lock);                       \
-}
-
-#define        PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pv_cnt) {                   \
-       simple_lock(&pv_hashed_free_list_lock);                         \
-       pvh_et->qlink.next = (queue_entry_t)pv_hashed_free_list;        \
-       pv_hashed_free_list = pvh_eh;                                   \
-        pv_hashed_free_count += pv_cnt;                                        \
-       simple_unlock(&pv_hashed_free_list_lock);                       \
-}
-
-#define        PV_HASHED_KERN_ALLOC(pvh_e) {                                   \
-       simple_lock(&pv_hashed_kern_free_list_lock);                    \
-       if ((pvh_e = pv_hashed_kern_free_list) != 0) {                  \
-         pv_hashed_kern_free_list = (pv_hashed_entry_t)pvh_e->qlink.next; \
-          pv_hashed_kern_free_count--;                                 \
-          if (pv_hashed_kern_free_count < PV_HASHED_KERN_LOW_WATER_MARK)\
-            if (hw_compare_and_store(0,1,(u_int *)&mappingrecurse))    \
-              thread_call_enter(mapping_adjust_call);                  \
-       }                                                               \
-       simple_unlock(&pv_hashed_kern_free_list_lock);                  \
-}
 
-#define        PV_HASHED_KERN_FREE_LIST(pvh_eh, pvh_et, pv_cnt) {              \
-       simple_lock(&pv_hashed_kern_free_list_lock);                    \
-       pvh_et->qlink.next = (queue_entry_t)pv_hashed_kern_free_list;   \
-       pv_hashed_kern_free_list = pvh_eh;                              \
-        pv_hashed_kern_free_count += pv_cnt;                           \
-       simple_unlock(&pv_hashed_kern_free_list_lock);                  \
-}
 
 zone_t         pv_hashed_list_zone;    /* zone of pv_hashed_entry structures */
 
@@ -436,10 +221,10 @@ static zone_t pdpt_zone;
  */
 
 char   *pv_lock_table;         /* pointer to array of bits */
-#define pv_lock_table_size(n)  (((n)+BYTE_SIZE-1)/BYTE_SIZE)
+
 
 char    *pv_hash_lock_table;
-#define pv_hash_lock_table_size(n)  (((n)+BYTE_SIZE-1)/BYTE_SIZE)
+
 
 /*
  *     First and last physical addresses that we maintain any information
@@ -453,97 +238,13 @@ static struct vm_object kpml4obj_object_store;
 static struct vm_object kpdptobj_object_store;
 
 /*
- *     Index into pv_head table, its lock bits, and the modify/reference and managed bits
- */
-
-#define pa_index(pa)           (i386_btop(pa))
-#define ppn_to_pai(ppn)                ((int)ppn)
-
-#define pai_to_pvh(pai)                (&pv_head_table[pai])
-#define lock_pvh_pai(pai)      bit_lock(pai, (void *)pv_lock_table)
-#define unlock_pvh_pai(pai)    bit_unlock(pai, (void *)pv_lock_table)
-
-static inline uint32_t
-pvhashidx(pmap_t pmap, vm_offset_t va)
-{
-       return ((uint32_t)(uint64_t)pmap ^
-               ((uint32_t)((uint64_t)va >> PAGE_SHIFT) & 0xFFFFFFFF)) &
-              npvhash;
-}
-#define pvhash(idx)            (&pv_hash_table[idx])
-
-#define lock_hash_hash(hash)   bit_lock(hash, (void *)pv_hash_lock_table)
-#define unlock_hash_hash(hash) bit_unlock(hash, (void *)pv_hash_lock_table)
-
-/*
- *     Array of physical page attribites for managed pages.
+ *     Array of physical page attributes for managed pages.
  *     One byte per physical page.
  */
 char           *pmap_phys_attributes;
 unsigned int   last_managed_page = 0;
-#define IS_MANAGED_PAGE(x)                             \
-       ((unsigned int)(x) <= last_managed_page &&      \
-        (pmap_phys_attributes[x] & PHYS_MANAGED))
-
-/*
- *     Physical page attributes.  Copy bits from PTE definition.
- */
-#define        PHYS_MODIFIED   INTEL_PTE_MOD   /* page modified */
-#define        PHYS_REFERENCED INTEL_PTE_REF   /* page referenced */
-#define PHYS_MANAGED   INTEL_PTE_VALID /* page is managed */
-
-/*
- *     Amount of virtual memory mapped by one
- *     page-directory entry.
- */
-#define        PDE_MAPPED_SIZE         (pdetova(1))
 uint64_t pde_mapped_size = PDE_MAPPED_SIZE;
 
-/*
- *     Locking and TLB invalidation
- */
-
-/*
- *     Locking Protocols: (changed 2/2007 JK)
- *
- *     There are two structures in the pmap module that need locking:
- *     the pmaps themselves, and the per-page pv_lists (which are locked
- *     by locking the pv_lock_table entry that corresponds to the pv_head
- *     for the list in question.)  Most routines want to lock a pmap and
- *     then do operations in it that require pv_list locking -- however
- *     pmap_remove_all and pmap_copy_on_write operate on a physical page
- *     basis and want to do the locking in the reverse order, i.e. lock
- *     a pv_list and then go through all the pmaps referenced by that list.
- *
- *      The system wide pmap lock has been removed. Now, paths take a lock
- *      on the pmap before changing its 'shape' and the reverse order lockers
- *      (coming in by phys ppn) take a lock on the corresponding pv and then
- *      retest to be sure nothing changed during the window before they locked
- *      and can then run up/down the pv lists holding the list lock. This also
- *      lets the pmap layer run (nearly completely) interrupt enabled, unlike
- *      previously.
- */
-
-/*
- * PV locking
- */
-
-#define LOCK_PVH(index)        {               \
-       mp_disable_preemption();        \
-       lock_pvh_pai(index);            \
-}
-
-#define UNLOCK_PVH(index) {            \
-       unlock_pvh_pai(index);          \
-       mp_enable_preemption();         \
-}
-/*
- * PV hash locking
- */
-
-#define LOCK_PV_HASH(hash)         lock_hash_hash(hash)
-#define UNLOCK_PV_HASH(hash)       unlock_hash_hash(hash)
-
 unsigned pmap_memory_region_count;
 unsigned pmap_memory_region_current;
 
@@ -562,8 +263,6 @@ pd_entry_t  commpage64_pde;
 
 struct zone    *pmap_zone;             /* zone of pmap structures */
 
-int            pmap_debug = 0;         /* flag for debugging prints */
-
 unsigned int   inuse_ptepages_count = 0;
 
 addr64_t       kernel64_cr3;
@@ -585,170 +284,6 @@ static int        nkpt;
 pt_entry_t     *DMAP1, *DMAP2;
 caddr_t         DADDR1;
 caddr_t         DADDR2;
-
-/*
- * unlinks the pv_hashed_entry_t pvh from the singly linked hash chain.
- * properly deals with the anchor.
- * must be called with the hash locked, does not unlock it
- */
-
-static inline void 
-pmap_pvh_unlink(pv_hashed_entry_t pvh)
-{
-       pv_hashed_entry_t       curh;
-       pv_hashed_entry_t       *pprevh;
-       int                     pvhash_idx;
-
-       CHK_NPVHASH();
-       pvhash_idx = pvhashidx(pvh->pmap, pvh->va);
-
-       pprevh = pvhash(pvhash_idx);
-
-#if PV_DEBUG
-       if (NULL == *pprevh)
-               panic("pvh_unlink null anchor"); /* JK DEBUG */
-#endif
-       curh = *pprevh;
-
-       while (PV_HASHED_ENTRY_NULL != curh) {
-               if (pvh == curh)
-                       break;
-               pprevh = &curh->nexth;
-               curh = curh->nexth;
-       }
-       if (PV_HASHED_ENTRY_NULL == curh) panic("pmap_pvh_unlink no pvh");
-       *pprevh = pvh->nexth;
-       return;
-}
-
-static inline void
-pv_hash_add(pv_hashed_entry_t  pvh_e,
-           pv_rooted_entry_t   pv_h)
-{
-       pv_hashed_entry_t       *hashp;
-       int                     pvhash_idx;
-
-       CHK_NPVHASH();
-       pvhash_idx = pvhashidx(pvh_e->pmap, pvh_e->va);
-       LOCK_PV_HASH(pvhash_idx);
-       insque(&pvh_e->qlink, &pv_h->qlink);
-       hashp = pvhash(pvhash_idx);
-#if PV_DEBUG
-       if (NULL==hashp)
-               panic("pv_hash_add(%p) null hash bucket", pvh_e);
-#endif
-       pvh_e->nexth = *hashp;
-       *hashp = pvh_e;
-       UNLOCK_PV_HASH(pvhash_idx);
-}
-
-static inline void
-pv_hash_remove(pv_hashed_entry_t pvh_e)
-{
-       int                     pvhash_idx;
-
-       CHK_NPVHASH();
-       pvhash_idx = pvhashidx(pvh_e->pmap,pvh_e->va);
-       LOCK_PV_HASH(pvhash_idx);
-       remque(&pvh_e->qlink);
-       pmap_pvh_unlink(pvh_e);
-       UNLOCK_PV_HASH(pvhash_idx);
-} 
-
-/*
- * Remove pv list entry.
- * Called with pv_head_table entry locked.
- * Returns pv entry to be freed (or NULL).
- */
-static inline pv_hashed_entry_t
-pmap_pv_remove(pmap_t          pmap,
-              vm_map_offset_t  vaddr,
-              ppnum_t          ppn)
-{
-       pv_hashed_entry_t       pvh_e;
-       pv_rooted_entry_t       pv_h;
-       pv_hashed_entry_t       *pprevh;
-       int                     pvhash_idx;
-       uint32_t                pv_cnt;
-
-       pvh_e = PV_HASHED_ENTRY_NULL;
-       pv_h = pai_to_pvh(ppn_to_pai(ppn));
-       if (pv_h->pmap == PMAP_NULL)
-               panic("pmap_pv_remove(%p,%llu,%u): null pv_list!",
-                     pmap, vaddr, ppn);
-
-       if (pv_h->va == vaddr && pv_h->pmap == pmap) {
-               /*
-                * Header is the pv_rooted_entry.
-                * We can't free that. If there is a queued
-                * entry after this one we remove that
-                * from the ppn queue, we remove it from the hash chain
-                * and copy it to the rooted entry. Then free it instead.
-                */
-               pvh_e = (pv_hashed_entry_t) queue_next(&pv_h->qlink);
-               if (pv_h != (pv_rooted_entry_t) pvh_e) {
-                       /*
-                        * Entry queued to root, remove this from hash
-                        * and install as nem root.
-                        */
-                       CHK_NPVHASH();
-                       pvhash_idx = pvhashidx(pvh_e->pmap, pvh_e->va);
-                       LOCK_PV_HASH(pvhash_idx);
-                       remque(&pvh_e->qlink);
-                       pprevh = pvhash(pvhash_idx);
-                       if (PV_HASHED_ENTRY_NULL == *pprevh) {
-                               panic("pmap_pv_remove(%p,%llu,%u): "
-                                     "empty hash, removing rooted",
-                                     pmap, vaddr, ppn);
-                       }
-                       pmap_pvh_unlink(pvh_e);
-                       UNLOCK_PV_HASH(pvhash_idx);
-                       pv_h->pmap = pvh_e->pmap;
-                       pv_h->va = pvh_e->va;   /* dispose of pvh_e */
-               } else {
-                       /* none queued after rooted */
-                       pv_h->pmap = PMAP_NULL;
-                       pvh_e = PV_HASHED_ENTRY_NULL;
-               }
-       } else {
-               /*
-                * not removing rooted pv. find it on hash chain, remove from
-                * ppn queue and hash chain and free it
-                */
-               CHK_NPVHASH();
-               pvhash_idx = pvhashidx(pmap, vaddr);
-               LOCK_PV_HASH(pvhash_idx);
-               pprevh = pvhash(pvhash_idx);
-               if (PV_HASHED_ENTRY_NULL == *pprevh) {
-                       panic("pmap_pv_remove(%p,%llu,%u): empty hash",
-                             pmap, vaddr, ppn);
-               }
-               pvh_e = *pprevh;
-               pmap_pv_hashlist_walks++;
-               pv_cnt = 0;
-               while (PV_HASHED_ENTRY_NULL != pvh_e) {
-                       pv_cnt++;
-                       if (pvh_e->pmap == pmap &&
-                           pvh_e->va == vaddr &&
-                           pvh_e->ppn == ppn)
-                               break;
-                       pprevh = &pvh_e->nexth;
-                       pvh_e = pvh_e->nexth;
-               }
-               if (PV_HASHED_ENTRY_NULL == pvh_e)
-                       panic("pmap_pv_remove(%p,%llu,%u): pv not on hash",
-                        pmap, vaddr, ppn);
-               pmap_pv_hashlist_cnts += pv_cnt;
-               if (pmap_pv_hashlist_max < pv_cnt)
-                       pmap_pv_hashlist_max = pv_cnt;
-               *pprevh = pvh_e->nexth;
-               remque(&pvh_e->qlink);
-               UNLOCK_PV_HASH(pvhash_idx);
-       }
-
-       return pvh_e;
-}
-
 /*
  * for legacy, returns the address of the pde entry.
  * for 64 bit, causes the pdpt page containing the pde entry to be mapped,
@@ -1463,147 +998,6 @@ pmap_reference(pmap_t    p)
        }
 }
 
-/*
- *     Remove a range of hardware page-table entries.
- *     The entries given are the first (inclusive)
- *     and last (exclusive) entries for the VM pages.
- *     The virtual address is the va for the first pte.
- *
- *     The pmap must be locked.
- *     If the pmap is not the kernel pmap, the range must lie
- *     entirely within one pte-page.  This is NOT checked.
- *     Assumes that the pte-page exists.
- */
-
-void
-pmap_remove_range(
-       pmap_t                  pmap,
-       vm_map_offset_t         start_vaddr,
-       pt_entry_t              *spte,
-       pt_entry_t              *epte)
-{
-       pt_entry_t              *cpte;
-       pv_hashed_entry_t       pvh_et = PV_HASHED_ENTRY_NULL;
-       pv_hashed_entry_t       pvh_eh = PV_HASHED_ENTRY_NULL;
-       pv_hashed_entry_t       pvh_e;
-       int                     pvh_cnt = 0;
-       int                     num_removed, num_unwired, num_found;
-       int                     pai;
-       pmap_paddr_t            pa;
-       vm_map_offset_t         vaddr;
-
-       num_removed = 0;
-       num_unwired = 0;
-       num_found   = 0;
-
-       /* invalidate the PTEs first to "freeze" them */
-       for (cpte = spte, vaddr = start_vaddr;
-            cpte < epte;
-            cpte++, vaddr += PAGE_SIZE_64) {
-
-               pa = pte_to_pa(*cpte);
-               if (pa == 0)
-                       continue;
-               num_found++;
-
-               if (iswired(*cpte))
-                       num_unwired++;
-
-               pai = pa_index(pa);
-
-               if (!IS_MANAGED_PAGE(pai)) {
-                       /*
-                        *      Outside range of managed physical memory.
-                        *      Just remove the mappings.
-                        */
-                       pmap_store_pte(cpte, 0);
-                       continue;
-               }
-
-               /* invalidate the PTE */ 
-               pmap_update_pte(cpte, *cpte, (*cpte & ~INTEL_PTE_VALID));
-       }
-
-       if (num_found == 0) {
-               /* nothing was changed: we're done */
-               goto update_counts;
-       }
-
-       /* propagate the invalidates to other CPUs */
-
-       PMAP_UPDATE_TLBS(pmap, start_vaddr, vaddr);
-
-       for (cpte = spte, vaddr = start_vaddr;
-            cpte < epte;
-            cpte++, vaddr += PAGE_SIZE_64) {
-
-               pa = pte_to_pa(*cpte);
-               if (pa == 0)
-                       continue;
-
-               pai = pa_index(pa);
-
-               LOCK_PVH(pai);
-
-               pa = pte_to_pa(*cpte);
-               if (pa == 0) {
-                       UNLOCK_PVH(pai);
-                       continue;
-               }
-               num_removed++;
-
-               /*
-                * Get the modify and reference bits, then
-                * nuke the entry in the page table
-                */
-               /* remember reference and change */
-               pmap_phys_attributes[pai] |=
-                       (char) (*cpte & (PHYS_MODIFIED | PHYS_REFERENCED));
-               /* completely invalidate the PTE */
-               pmap_store_pte(cpte, 0);
-
-               /*
-                * Remove the mapping from the pvlist for this physical page.
-                */
-               pvh_e = pmap_pv_remove(pmap, vaddr, (ppnum_t) pai);
-
-               UNLOCK_PVH(pai);
-
-               if (pvh_e != PV_HASHED_ENTRY_NULL) {
-                       pvh_e->qlink.next = (queue_entry_t) pvh_eh;
-                       pvh_eh = pvh_e;
-
-                       if (pvh_et == PV_HASHED_ENTRY_NULL) {
-                               pvh_et = pvh_e;
-                       }
-                       pvh_cnt++;
-               }
-       } /* for loop */
-
-       if (pvh_eh != PV_HASHED_ENTRY_NULL) {
-               PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pvh_cnt);
-       }
-update_counts:
-       /*
-        *      Update the counts
-        */
-#if TESTING
-       if (pmap->stats.resident_count < num_removed)
-               panic("pmap_remove_range: resident_count");
-#endif
-       assert(pmap->stats.resident_count >= num_removed);
-       OSAddAtomic(-num_removed,  &pmap->stats.resident_count);
-
-#if TESTING
-       if (pmap->stats.wired_count < num_unwired)
-               panic("pmap_remove_range: wired_count");
-#endif
-       assert(pmap->stats.wired_count >= num_unwired);
-       OSAddAtomic(-num_unwired,  &pmap->stats.wired_count);
-
-       return;
-}
-
 /*
  *     Remove phys addr if mapped in specified map
  *
@@ -1618,274 +1012,6 @@ pmap_remove_some_phys(
 
 }
 
-/*
- *     Remove the given range of addresses
- *     from the specified map.
- *
- *     It is assumed that the start and end are properly
- *     rounded to the hardware page size.
- */
-void
-pmap_remove(
-       pmap_t          map,
-       addr64_t        s64,
-       addr64_t        e64)
-{
-       pt_entry_t     *pde;
-       pt_entry_t     *spte, *epte;
-       addr64_t        l64;
-       uint64_t        deadline;
-
-       pmap_intr_assert();
-
-       if (map == PMAP_NULL || s64 == e64)
-               return;
-
-       PMAP_TRACE(PMAP_CODE(PMAP__REMOVE) | DBG_FUNC_START,
-                  map,
-                  (uint32_t) (s64 >> 32), s64,
-                  (uint32_t) (e64 >> 32), e64);
-
-
-       PMAP_LOCK(map);
-
-#if 0
-       /*
-        * Check that address range in the kernel does not overlap the stacks.
-        * We initialize local static min/max variables once to avoid making
-        * 2 function calls for every remove. Note also that these functions
-        * both return 0 before kernel stacks have been initialized, and hence
-        * the panic is not triggered in this case.
-        */
-       if (map == kernel_pmap) {
-               static vm_offset_t kernel_stack_min = 0;
-               static vm_offset_t kernel_stack_max = 0;
-
-               if (kernel_stack_min == 0) {
-                       kernel_stack_min = min_valid_stack_address();
-                       kernel_stack_max = max_valid_stack_address();
-               }
-               if ((kernel_stack_min <= s64 && s64 < kernel_stack_max) ||
-                   (kernel_stack_min < e64 && e64 <= kernel_stack_max))
-                       panic("pmap_remove() attempted in kernel stack");
-       }
-#else
-
-       /*
-        * The values of kernel_stack_min and kernel_stack_max are no longer
-        * relevant now that we allocate kernel stacks in the kernel map,
-        * so the old code above no longer applies.  If we wanted to check that
-        * we weren't removing a mapping of a page in a kernel stack we'd 
-        * mark the PTE with an unused bit and check that here.
-        */
-
-#endif
-
-       deadline = rdtsc64() + max_preemption_latency_tsc;
-
-       while (s64 < e64) {
-               l64 = (s64 + pde_mapped_size) & ~(pde_mapped_size - 1);
-               if (l64 > e64)
-                       l64 = e64;
-               pde = pmap_pde(map, s64);
-
-               if (pde && (*pde & INTEL_PTE_VALID)) {
-                       if (*pde & INTEL_PTE_PS) {
-                               /*
-                                * If we're removing a superpage, pmap_remove_range()
-                                * must work on level 2 instead of level 1; and we're
-                                * only passing a single level 2 entry instead of a
-                                * level 1 range.
-                                */
-                               spte = pde;
-                               epte = spte+1; /* excluded */
-                       } else {
-                               spte = pmap_pte(map, (s64 & ~(pde_mapped_size - 1)));
-                               spte = &spte[ptenum(s64)];
-                               epte = &spte[intel_btop(l64 - s64)];
-                       }
-                       pmap_remove_range(map, s64, spte, epte);
-               }
-               s64 = l64;
-               pde++;
-
-               if (s64 < e64 && rdtsc64() >= deadline) {
-                       PMAP_UNLOCK(map)
-                       PMAP_LOCK(map)
-                       deadline = rdtsc64() + max_preemption_latency_tsc;
-               }
-       }
-
-       PMAP_UNLOCK(map);
-
-       PMAP_TRACE(PMAP_CODE(PMAP__REMOVE) | DBG_FUNC_END,
-                  map, 0, 0, 0, 0);
-
-}
-
-/*
- *     Routine:        pmap_page_protect
- *
- *     Function:
- *             Lower the permission for all mappings to a given
- *             page.
- */
-void
-pmap_page_protect(
-        ppnum_t         pn,
-       vm_prot_t       prot)
-{
-       pv_hashed_entry_t       pvh_eh = PV_HASHED_ENTRY_NULL;
-       pv_hashed_entry_t       pvh_et = PV_HASHED_ENTRY_NULL;
-       pv_hashed_entry_t       nexth;
-       int                     pvh_cnt = 0;
-       pv_rooted_entry_t       pv_h;
-       pv_rooted_entry_t       pv_e;
-       pv_hashed_entry_t       pvh_e;
-       pt_entry_t              *pte;
-       int                     pai;
-       pmap_t                  pmap;
-       boolean_t               remove;
-
-       pmap_intr_assert();
-       assert(pn != vm_page_fictitious_addr);
-       if (pn == vm_page_guard_addr)
-               return;
-
-       pai = ppn_to_pai(pn);
-
-       if (!IS_MANAGED_PAGE(pai)) {
-               /*
-                *      Not a managed page.
-                */
-               return;
-       }
-       PMAP_TRACE(PMAP_CODE(PMAP__PAGE_PROTECT) | DBG_FUNC_START,
-                  pn, prot, 0, 0, 0);
-
-       /*
-        * Determine the new protection.
-        */
-       switch (prot) {
-       case VM_PROT_READ:
-       case VM_PROT_READ | VM_PROT_EXECUTE:
-               remove = FALSE;
-               break;
-       case VM_PROT_ALL:
-               return;         /* nothing to do */
-       default:
-               remove = TRUE;
-               break;
-       }
-
-       pv_h = pai_to_pvh(pai);
-
-       LOCK_PVH(pai);
-
-
-       /*
-        * Walk down PV list, if any, changing or removing all mappings.
-        */
-       if (pv_h->pmap == PMAP_NULL)
-               goto done;
-
-       pv_e = pv_h;
-       pvh_e = (pv_hashed_entry_t) pv_e;       /* cheat */
-
-       do {
-               vm_map_offset_t vaddr;
-
-               pmap = pv_e->pmap;
-               vaddr = pv_e->va;
-               pte = pmap_pte(pmap, vaddr);
-               if (0 == pte) {
-                       panic("pmap_page_protect() "
-                               "pmap=%p pn=0x%x vaddr=0x%llx\n",
-                               pmap, pn, vaddr);
-               }
-               nexth = (pv_hashed_entry_t) queue_next(&pvh_e->qlink);
-
-               /*
-                * Remove the mapping if new protection is NONE
-                * or if write-protecting a kernel mapping.
-                */
-               if (remove || pmap == kernel_pmap) {
-                       /*
-                        * Remove the mapping, collecting dirty bits.
-                        */
-                       pmap_update_pte(pte, *pte, *pte & ~INTEL_PTE_VALID);
-                       PMAP_UPDATE_TLBS(pmap, vaddr, vaddr+PAGE_SIZE);
-                       pmap_phys_attributes[pai] |=
-                               *pte & (PHYS_MODIFIED|PHYS_REFERENCED);
-                       pmap_store_pte(pte, 0);
-
-#if TESTING
-                       if (pmap->stats.resident_count < 1)
-                               panic("pmap_page_protect: resident_count");
-#endif
-                       assert(pmap->stats.resident_count >= 1);
-                       OSAddAtomic(-1,  &pmap->stats.resident_count);
-
-                       /*
-                        * Deal with the pv_rooted_entry.
-                        */
-
-                       if (pv_e == pv_h) {
-                               /*
-                                * Fix up head later.
-                                */
-                               pv_h->pmap = PMAP_NULL;
-                       } else {
-                               /*
-                                * Delete this entry.
-                                */
-                               pv_hash_remove(pvh_e);
-                               pvh_e->qlink.next = (queue_entry_t) pvh_eh;
-                               pvh_eh = pvh_e;
-
-                               if (pvh_et == PV_HASHED_ENTRY_NULL)
-                                       pvh_et = pvh_e;
-                               pvh_cnt++;
-                       }
-               } else {
-                       /*
-                        * Write-protect.
-                        */
-                       pmap_update_pte(pte, *pte, *pte & ~INTEL_PTE_WRITE);
-                       PMAP_UPDATE_TLBS(pmap, vaddr, vaddr+PAGE_SIZE);
-               }
-               pvh_e = nexth;
-       } while ((pv_e = (pv_rooted_entry_t) nexth) != pv_h);
-
-
-       /*
-         * If pv_head mapping was removed, fix it up.
-         */
-       if (pv_h->pmap == PMAP_NULL) {
-               pvh_e = (pv_hashed_entry_t) queue_next(&pv_h->qlink);
-
-               if (pvh_e != (pv_hashed_entry_t) pv_h) {
-                       pv_hash_remove(pvh_e);
-                       pv_h->pmap = pvh_e->pmap;
-                       pv_h->va = pvh_e->va;
-                       pvh_e->qlink.next = (queue_entry_t) pvh_eh;
-                       pvh_eh = pvh_e;
-
-                       if (pvh_et == PV_HASHED_ENTRY_NULL)
-                               pvh_et = pvh_e;
-                       pvh_cnt++;
-               }
-       }
-       if (pvh_eh != PV_HASHED_ENTRY_NULL) {
-               PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pvh_cnt);
-       }
-done:
-       UNLOCK_PVH(pai);
-
-       PMAP_TRACE(PMAP_CODE(PMAP__PAGE_PROTECT) | DBG_FUNC_END,
-                  0, 0, 0, 0, 0);
-}
-
 
 /*
  *     Routine:
@@ -2019,398 +1145,6 @@ pmap_map_block(
        }
 }
 
-
-/*
- *     Insert the given physical page (p) at
- *     the specified virtual address (v) in the
- *     target physical map with the protection requested.
- *
- *     If specified, the page will be wired down, meaning
- *     that the related pte cannot be reclaimed.
- *
- *     NB:  This is the only routine which MAY NOT lazy-evaluate
- *     or lose information.  That is, this routine must actually
- *     insert this page into the given map NOW.
- */
-void
-pmap_enter(
-       register pmap_t         pmap,
-       vm_map_offset_t         vaddr,
-       ppnum_t                 pn,
-       vm_prot_t               prot,
-       unsigned int            flags,
-       boolean_t               wired)
-{
-       pt_entry_t              *pte;
-       pv_rooted_entry_t       pv_h;
-       int                     pai;
-       pv_hashed_entry_t       pvh_e;
-       pv_hashed_entry_t       pvh_new;
-       pt_entry_t              template;
-       pmap_paddr_t            old_pa;
-       pmap_paddr_t            pa = (pmap_paddr_t) i386_ptob(pn);
-       boolean_t               need_tlbflush = FALSE;
-       boolean_t               set_NX;
-       char                    oattr;
-       boolean_t               old_pa_locked;
-       boolean_t               superpage = flags & VM_MEM_SUPERPAGE;
-       vm_object_t             delpage_pm_obj = NULL;
-       int                     delpage_pde_index = 0;
-
-
-       pmap_intr_assert();
-       assert(pn != vm_page_fictitious_addr);
-       if (pmap_debug)
-               kprintf("pmap_enter(%p,%llu,%u)\n", pmap, vaddr, pn);
-       if (pmap == PMAP_NULL)
-               return;
-       if (pn == vm_page_guard_addr)
-               return;
-
-       PMAP_TRACE(PMAP_CODE(PMAP__ENTER) | DBG_FUNC_START,
-                  pmap,
-                  (uint32_t) (vaddr >> 32), (uint32_t) vaddr,
-                  pn, prot);
-
-       if ((prot & VM_PROT_EXECUTE) || !nx_enabled || !pmap->nx_enabled)
-               set_NX = FALSE;
-       else
-               set_NX = TRUE;
-
-       /*
-        *      Must allocate a new pvlist entry while we're unlocked;
-        *      zalloc may cause pageout (which will lock the pmap system).
-        *      If we determine we need a pvlist entry, we will unlock
-        *      and allocate one.  Then we will retry, throughing away
-        *      the allocated entry later (if we no longer need it).
-        */
-
-       pvh_new = PV_HASHED_ENTRY_NULL;
-Retry:
-       pvh_e = PV_HASHED_ENTRY_NULL;
-
-       PMAP_LOCK(pmap);
-
-       /*
-        *      Expand pmap to include this pte.  Assume that
-        *      pmap is always expanded to include enough hardware
-        *      pages to map one VM page.
-        */
-        if(superpage) {
-               while ((pte = pmap64_pde(pmap, vaddr)) == PD_ENTRY_NULL) {
-                       /* need room for another pde entry */
-                       PMAP_UNLOCK(pmap);
-                       pmap_expand_pdpt(pmap, vaddr);
-                       PMAP_LOCK(pmap);
-               }
-       } else {
-               while ((pte = pmap_pte(pmap, vaddr)) == PT_ENTRY_NULL) {
-                       /*
-                        * Must unlock to expand the pmap
-                        * going to grow pde level page(s)
-                        */
-                       PMAP_UNLOCK(pmap);
-                       pmap_expand(pmap, vaddr);
-                       PMAP_LOCK(pmap);
-               }
-       }
-
-       if (superpage && *pte && !(*pte & INTEL_PTE_PS)) {
-               /*
-                * There is still an empty page table mapped that
-                * was used for a previous base page mapping.
-                * Remember the PDE and the PDE index, so that we
-                * can free the page at the end of this function.
-                */
-               delpage_pde_index = (int)pdeidx(pmap, vaddr);
-               delpage_pm_obj = pmap->pm_obj;
-               *pte = 0;
-       }
-
-       old_pa = pte_to_pa(*pte);
-       pai = pa_index(old_pa);
-       old_pa_locked = FALSE;
-
-       /*
-        * if we have a previous managed page, lock the pv entry now. after
-        * we lock it, check to see if someone beat us to the lock and if so
-        * drop the lock
-        */
-       if ((0 != old_pa) && IS_MANAGED_PAGE(pai)) {
-               LOCK_PVH(pai);
-               old_pa_locked = TRUE;
-               old_pa = pte_to_pa(*pte);
-               if (0 == old_pa) {
-                       UNLOCK_PVH(pai);        /* another path beat us to it */
-                       old_pa_locked = FALSE;
-               }
-       }
-
-       /*
-        *      Special case if the incoming physical page is already mapped
-        *      at this address.
-        */
-       if (old_pa == pa) {
-
-               /*
-                *      May be changing its wired attribute or protection
-                */
-
-               template = pa_to_pte(pa) | INTEL_PTE_VALID;
-
-               if (VM_MEM_NOT_CACHEABLE ==
-                   (flags & (VM_MEM_NOT_CACHEABLE | VM_WIMG_USE_DEFAULT))) {
-                       if (!(flags & VM_MEM_GUARDED))
-                               template |= INTEL_PTE_PTA;
-                       template |= INTEL_PTE_NCACHE;
-               }
-               if (pmap != kernel_pmap)
-                       template |= INTEL_PTE_USER;
-               if (prot & VM_PROT_WRITE)
-                       template |= INTEL_PTE_WRITE;
-
-               if (set_NX)
-                       template |= INTEL_PTE_NX;
-
-               if (wired) {
-                       template |= INTEL_PTE_WIRED;
-                       if (!iswired(*pte))
-                               OSAddAtomic(+1,
-                                       &pmap->stats.wired_count);
-               } else {
-                       if (iswired(*pte)) {
-                               assert(pmap->stats.wired_count >= 1);
-                               OSAddAtomic(-1,
-                                       &pmap->stats.wired_count);
-                       }
-               }
-               if (superpage)          /* this path can not be used */
-                       template |= INTEL_PTE_PS;       /* to change the page size! */
-
-               /* store modified PTE and preserve RC bits */
-               pmap_update_pte(pte, *pte,
-                       template | (*pte & (INTEL_PTE_REF | INTEL_PTE_MOD)));
-               if (old_pa_locked) {
-                       UNLOCK_PVH(pai);
-                       old_pa_locked = FALSE;
-               }
-               need_tlbflush = TRUE;
-               goto Done;
-       }
-
-       /*
-        *      Outline of code from here:
-        *         1) If va was mapped, update TLBs, remove the mapping
-        *            and remove old pvlist entry.
-        *         2) Add pvlist entry for new mapping
-        *         3) Enter new mapping.
-        *
-        *      If the old physical page is not managed step 1) is skipped
-        *      (except for updating the TLBs), and the mapping is
-        *      overwritten at step 3).  If the new physical page is not
-        *      managed, step 2) is skipped.
-        */
-
-       if (old_pa != (pmap_paddr_t) 0) {
-
-               /*
-                *      Don't do anything to pages outside valid memory here.
-                *      Instead convince the code that enters a new mapping
-                *      to overwrite the old one.
-                */
-
-               /* invalidate the PTE */
-               pmap_update_pte(pte, *pte, (*pte & ~INTEL_PTE_VALID));
-               /* propagate invalidate everywhere */
-               PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
-               /* remember reference and change */
-               oattr = (char) (*pte & (PHYS_MODIFIED | PHYS_REFERENCED));
-               /* completely invalidate the PTE */
-               pmap_store_pte(pte, 0);
-
-               if (IS_MANAGED_PAGE(pai)) {
-#if TESTING
-                       if (pmap->stats.resident_count < 1)
-                               panic("pmap_enter: resident_count");
-#endif
-                       assert(pmap->stats.resident_count >= 1);
-                       OSAddAtomic(-1,
-                               &pmap->stats.resident_count);
-
-                       if (iswired(*pte)) {
-#if TESTING
-                               if (pmap->stats.wired_count < 1)
-                                       panic("pmap_enter: wired_count");
-#endif
-                               assert(pmap->stats.wired_count >= 1);
-                               OSAddAtomic(-1,
-                                       &pmap->stats.wired_count);
-                       }
-                       pmap_phys_attributes[pai] |= oattr;
-
-                       /*
-                        *      Remove the mapping from the pvlist for
-                        *      this physical page.
-                        *      We'll end up with either a rooted pv or a
-                        *      hashed pv
-                        */
-                       pvh_e = pmap_pv_remove(pmap, vaddr, (ppnum_t) pai);
-
-               } else {
-
-                       /*
-                        *      old_pa is not managed.
-                        *      Do removal part of accounting.
-                        */
-
-                       if (iswired(*pte)) {
-                               assert(pmap->stats.wired_count >= 1);
-                               OSAddAtomic(-1,
-                                       &pmap->stats.wired_count);
-                       }
-               }
-       }
-
-       /*
-        * if we had a previously managed paged locked, unlock it now
-        */
-       if (old_pa_locked) {
-               UNLOCK_PVH(pai);
-               old_pa_locked = FALSE;
-       }
-
-       pai = pa_index(pa);     /* now working with new incoming phys page */
-       if (IS_MANAGED_PAGE(pai)) {
-
-               /*
-                *      Step 2) Enter the mapping in the PV list for this
-                *      physical page.
-                */
-               pv_h = pai_to_pvh(pai);
-
-               LOCK_PVH(pai);
-
-               if (pv_h->pmap == PMAP_NULL) {
-                       /*
-                        *      No mappings yet, use rooted pv
-                        */
-                       pv_h->va = vaddr;
-                       pv_h->pmap = pmap;
-                       queue_init(&pv_h->qlink);
-               } else {
-                       /*
-                        *      Add new pv_hashed_entry after header.
-                        */
-                       if ((PV_HASHED_ENTRY_NULL == pvh_e) && pvh_new) {
-                               pvh_e = pvh_new;
-                               pvh_new = PV_HASHED_ENTRY_NULL;
-                       } else if (PV_HASHED_ENTRY_NULL == pvh_e) {
-                               PV_HASHED_ALLOC(pvh_e);
-                               if (PV_HASHED_ENTRY_NULL == pvh_e) {
-                                       /*
-                                        * the pv list is empty. if we are on
-                                        * the kernel pmap we'll use one of
-                                        * the special private kernel pv_e's,
-                                        * else, we need to unlock
-                                        * everything, zalloc a pv_e, and
-                                        * restart bringing in the pv_e with
-                                        * us.
-                                        */
-                                       if (kernel_pmap == pmap) {
-                                               PV_HASHED_KERN_ALLOC(pvh_e);
-                                       } else {
-                                               UNLOCK_PVH(pai);
-                                               PMAP_UNLOCK(pmap);
-                                               pvh_new = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone);
-                                               goto Retry;
-                                       }
-                               }
-                       }
-                       if (PV_HASHED_ENTRY_NULL == pvh_e)
-                               panic("pvh_e exhaustion");
-
-                       pvh_e->va = vaddr;
-                       pvh_e->pmap = pmap;
-                       pvh_e->ppn = pn;
-                       pv_hash_add(pvh_e, pv_h);
-
-                       /*
-                        *      Remember that we used the pvlist entry.
-                        */
-                       pvh_e = PV_HASHED_ENTRY_NULL;
-               }
-
-               /*
-                * only count the mapping
-                * for 'managed memory'
-                */
-               OSAddAtomic(+1,  & pmap->stats.resident_count);
-               if (pmap->stats.resident_count > pmap->stats.resident_max) {
-                       pmap->stats.resident_max = pmap->stats.resident_count;
-               }
-       }
-       /*
-        * Step 3) Enter the mapping.
-        *
-        *      Build a template to speed up entering -
-        *      only the pfn changes.
-        */
-       template = pa_to_pte(pa) | INTEL_PTE_VALID;
-
-       if (flags & VM_MEM_NOT_CACHEABLE) {
-               if (!(flags & VM_MEM_GUARDED))
-                       template |= INTEL_PTE_PTA;
-               template |= INTEL_PTE_NCACHE;
-       }
-       if (pmap != kernel_pmap)
-               template |= INTEL_PTE_USER;
-       if (prot & VM_PROT_WRITE)
-               template |= INTEL_PTE_WRITE;
-       if (set_NX)
-               template |= INTEL_PTE_NX;
-       if (wired) {
-               template |= INTEL_PTE_WIRED;
-               OSAddAtomic(+1,  & pmap->stats.wired_count);
-       }
-       if (superpage)
-               template |= INTEL_PTE_PS;
-       pmap_store_pte(pte, template);
-
-       /*
-        * if this was a managed page we delayed unlocking the pv until here
-        * to prevent pmap_page_protect et al from finding it until the pte
-        * has been stored
-        */
-       if (IS_MANAGED_PAGE(pai)) {
-               UNLOCK_PVH(pai);
-       }
-Done:
-       if (need_tlbflush == TRUE)
-               PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
-
-       if (pvh_e != PV_HASHED_ENTRY_NULL) {
-               PV_HASHED_FREE_LIST(pvh_e, pvh_e, 1);
-       }
-       if (pvh_new != PV_HASHED_ENTRY_NULL) {
-               PV_HASHED_KERN_FREE_LIST(pvh_new, pvh_new, 1);
-       }
-       PMAP_UNLOCK(pmap);
-
-       if (delpage_pm_obj) {
-               vm_page_t m;
-
-               vm_object_lock(delpage_pm_obj);
-               m = vm_page_lookup(delpage_pm_obj, delpage_pde_index);
-               if (m == VM_PAGE_NULL)
-                   panic("pmap_enter: pte page not in object");
-               VM_PAGE_FREE(m);
-               OSAddAtomic(-1,  &inuse_ptepages_count);
-               vm_object_unlock(delpage_pm_obj);
-       }
-
-       PMAP_TRACE(PMAP_CODE(PMAP__ENTER) | DBG_FUNC_END, 0, 0, 0, 0, 0);
-}
-
 /*
  *     Routine:        pmap_change_wiring
  *     Function:       Change the wiring attribute for a map/virtual-address
@@ -3341,96 +2075,6 @@ phys_page_exists(ppnum_t pn)
        return TRUE;
 }
 
-void
-mapping_free_prime(void)
-{
-       int                     i;
-       pv_hashed_entry_t       pvh_e;
-       pv_hashed_entry_t       pvh_eh;
-       pv_hashed_entry_t       pvh_et;
-       int                     pv_cnt;
-
-       pv_cnt = 0;
-       pvh_eh = pvh_et = PV_HASHED_ENTRY_NULL;
-       for (i = 0; i < (5 * PV_HASHED_ALLOC_CHUNK); i++) {
-               pvh_e = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone);
-
-               pvh_e->qlink.next = (queue_entry_t)pvh_eh;
-               pvh_eh = pvh_e;
-
-               if (pvh_et == PV_HASHED_ENTRY_NULL)
-                       pvh_et = pvh_e;
-               pv_cnt++;
-       }
-       PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pv_cnt);
-
-       pv_cnt = 0;
-       pvh_eh = pvh_et = PV_HASHED_ENTRY_NULL;
-       for (i = 0; i < PV_HASHED_KERN_ALLOC_CHUNK; i++) {
-               pvh_e = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone);
-
-               pvh_e->qlink.next = (queue_entry_t)pvh_eh;
-               pvh_eh = pvh_e;
-
-               if (pvh_et == PV_HASHED_ENTRY_NULL)
-                       pvh_et = pvh_e;
-               pv_cnt++;
-       }
-       PV_HASHED_KERN_FREE_LIST(pvh_eh, pvh_et, pv_cnt);
-
-}
-
-void
-mapping_adjust(void)
-{
-       pv_hashed_entry_t       pvh_e;
-       pv_hashed_entry_t       pvh_eh;
-       pv_hashed_entry_t       pvh_et;
-       int                     pv_cnt;
-       int                     i;
-
-       if (mapping_adjust_call == NULL) {
-               thread_call_setup(&mapping_adjust_call_data,
-                                 (thread_call_func_t) mapping_adjust,
-                                 (thread_call_param_t) NULL);
-               mapping_adjust_call = &mapping_adjust_call_data;
-       }
-
-       pv_cnt = 0;
-       pvh_eh = pvh_et = PV_HASHED_ENTRY_NULL;
-       if (pv_hashed_kern_free_count < PV_HASHED_KERN_LOW_WATER_MARK) {
-               for (i = 0; i < PV_HASHED_KERN_ALLOC_CHUNK; i++) {
-                       pvh_e = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone);
-
-                       pvh_e->qlink.next = (queue_entry_t)pvh_eh;
-                       pvh_eh = pvh_e;
-
-                       if (pvh_et == PV_HASHED_ENTRY_NULL)
-                               pvh_et = pvh_e;
-                       pv_cnt++;
-               }
-               PV_HASHED_KERN_FREE_LIST(pvh_eh, pvh_et, pv_cnt);
-       }
-
-       pv_cnt = 0;
-       pvh_eh = pvh_et = PV_HASHED_ENTRY_NULL;
-       if (pv_hashed_free_count < PV_HASHED_LOW_WATER_MARK) {
-               for (i = 0; i < PV_HASHED_ALLOC_CHUNK; i++) {
-                       pvh_e = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone);
-
-                       pvh_e->qlink.next = (queue_entry_t)pvh_eh;
-                       pvh_eh = pvh_e;
-
-                       if (pvh_et == PV_HASHED_ENTRY_NULL)
-                               pvh_et = pvh_e;
-                       pv_cnt++;
-               }
-               PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pv_cnt);
-       }
-       mappingrecurse = 0;
-}
-
-
 void
 pmap_switch(pmap_t tpmap)
 {
index 4c5b5f07d0a4e9684ba471ba5890dc62783cf9f5..0e8749b2b2cdd56e4f5ab73936f95c78113721e6 100644 (file)
@@ -260,3 +260,9 @@ getval(
        *val = 1;
        return (NUM);
 }
+
+boolean_t 
+PE_imgsrc_mount_supported()
+{
+       return TRUE;
+}
index 6143820965c83e6ff5b3d7d5ce3f6ff0d895de6f..5a8fa5eff404a38874ddba79d85b82fcc97574ba 100644 (file)
@@ -184,15 +184,15 @@ void PE_init_platform(boolean_t vm_initialized, void * _args)
                /* Hack! FIXME.. */ 
         outb(0x21, 0xff);   /* Maskout all interrupts Pic1 */
         outb(0xa1, 0xff);   /* Maskout all interrupts Pic2 */
         if (PE_state.deviceTreeHead) {
             DTInit(PE_state.deviceTreeHead);
-    }
+        }
 
         pe_identify_machine(args);
     } else {
         pe_init_debug();
     }
+
 }
 
 void PE_create_console( void )
@@ -274,5 +274,8 @@ PE_stub_poll_input(__unused unsigned int options, char * c)
 int (*PE_poll_input)(unsigned int options, char * c)
        = PE_stub_poll_input;
 
-
-
+boolean_t
+PE_reboot_on_panic(void)
+{
+       return FALSE;
+}
index 3dd73dad7c419ea4a9287eb5f07255a6920c606a..7c3596f9f3a10fc252e23503a34a215395e5b67d 100644 (file)
@@ -189,6 +189,8 @@ typedef struct PE_Video       PE_Video;
 
 extern void initialize_screen(PE_Video *, unsigned int);
 
+extern void dim_screen(void);
+
 extern int PE_current_console(
        PE_Video *info);
 
@@ -275,6 +277,12 @@ extern void PE_cpu_machine_quiesce(
 
 extern void pe_init_debug(void);
 
+extern boolean_t PE_imgsrc_mount_supported(void);
+
+
+#if KERNEL_PRIVATE
+boolean_t PE_reboot_on_panic(void);
+#endif
 
 __END_DECLS